{
  "schemaVersion": "1.0",
  "item": {
    "slug": "search-intelligence-skill",
    "name": "Search Intelligence Skill",
    "source": "tencent",
    "type": "skill",
    "category": "开发工具",
    "sourceUrl": "https://clawhub.ai/elmaslouhymouaad/search-intelligence-skill",
    "canonicalUrl": "https://clawhub.ai/elmaslouhymouaad/search-intelligence-skill",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/search-intelligence-skill",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=search-intelligence-skill",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "README.md",
      "SKILL.md",
      "examples/usage.py",
      "requirements.txt",
      "search_dorks_skill/__init__.py",
      "search_dorks_skill/analyzer.py"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-05-07T17:22:31.273Z",
      "expiresAt": "2026-05-14T17:22:31.273Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=afrexai-annual-report",
        "contentDisposition": "attachment; filename=\"afrexai-annual-report-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/search-intelligence-skill"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/search-intelligence-skill",
    "agentPageUrl": "https://openagent3.xyz/skills/search-intelligence-skill/agent",
    "manifestUrl": "https://openagent3.xyz/skills/search-intelligence-skill/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/search-intelligence-skill/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "search-intelligence-skill",
        "body": "Use search-intelligence-skill to give any AI agent the ability to search the entire internet like an expert OSINT analyst, SEO engineer, and security researcher combined. All searches flow through your SearXNG instance — zero API keys, full privacy, 90+ engines.\n\nThe skill generates optimized dork queries, selects intelligent multi-step search strategies, translates operators across engines, routes queries to the best SearXNG engines, scores results by multi-signal relevance, and learns from results to refine searches automatically."
      },
      {
        "title": "Setup (once)",
        "body": "Install the package\n\n# From source (recommended)\ngit clone https://github.com/mouaad-ops/search-intelligence-skill.git\ncd search-intelligence-skill\npip install -e .\n\n# Or direct pip\npip install search-intelligence-skill # NOT yet working\n\nStart a SearXNG instance (if you don't have one)\n\n# Docker (quickest)\ndocker run -d \\\n  --name searxng \\\n  -p 8888:8080 \\\n  -e SEARXNG_SECRET=your-secret-key \\\n  searxng/searxng:latest\n\n# Verify it's running\ncurl http://localhost:8888/healthz\n\nEnable JSON API in SearXNG settings\n\n# In searxng/settings.yml — ensure search formats include json\nsearch:\n  formats:\n    - html\n    - json\n\nInitialize in code\n\nfrom search_intelligence_skill import SearchSkill\n\n# Default — localhost:8888\nskill = SearchSkill()\n\n# Custom instance\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    timeout=30.0,\n    max_retries=2,\n    rate_limit=0.5,\n    verify_ssl=True,\n    auto_refine=True,\n    max_refine_rounds=1,\n)\n\n# Verify connection\nif skill.health_check():\n    print(\"✓ SearXNG is reachable\")\nelse:\n    print(\"✗ Cannot reach SearXNG — check URL and port\")"
      },
      {
        "title": "Common Commands",
        "body": "Natural language search (the main interface)\n\nfrom search_intelligence_skill import SearchSkill\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\n\n# Just describe what you want — the skill handles everything:\n# intent detection, dork generation, engine selection, scoring\nreport = skill.search(\"find exposed .env files on example.com\")\n\n# Print LLM-ready formatted output\nprint(report.to_context())\n\n# Access structured results\nfor r in report.top(5):\n    print(f\"[{r.relevance:.1f}] {r.title}\")\n    print(f\"    {r.url}\")\n    print(f\"    {r.snippet[:200]}\")\n\nControl search depth\n\nfrom search_intelligence_skill import Depth\n\n# Quick — 1-2 queries, single step, fast lookup\nreport = skill.search(\"what is CORS\", depth=\"quick\")\n\n# Standard — 3-6 queries, multi-engine, good default\nreport = skill.search(\"python async frameworks comparison\", depth=\"standard\")\n\n# Deep — 6-12 queries, multi-step strategies, thorough research\nreport = skill.search(\"security audit of target.com\", depth=\"deep\")\n\n# Exhaustive — 12+ queries, full OSINT chains, complete sweep\nreport = skill.search(\"full recon on suspect-domain.com\", depth=\"exhaustive\")\n\nSecurity scanning — exposed files and panels\n\nreport = skill.search(\n    \"find exposed .env files, admin panels, and directory listings on example.com\",\n    depth=\"deep\",\n)\n\nprint(f\"Intent: {report.intent.category.value}/{report.intent.subcategory}\")\n# → Intent: security/exposed_files\n\nprint(f\"Strategy: {report.strategy.name}\")\n# → Strategy: multi_angle\n\nprint(f\"Results: {len(report.results)}\")\nfor r in report.top(10):\n    print(f\"  [{r.relevance:.1f}] {r.title} — {r.url}\")\n\nSecurity scanning — vulnerability research\n\n# CVE research\nreport = skill.search(\"CVE-2024-3094 xz backdoor exploit details\", depth=\"deep\")\n\n# Technology-specific vulnerabilities\nreport = skill.search(\n    \"Apache Struts remote code execution vulnerabilities 2024\",\n    depth=\"standard\",\n)\n\n# Exposed API endpoints\nreport = skill.search(\n    \"find exposed swagger API docs on target.com\",\n    depth=\"deep\",\n)\n\n# Git repository exposure\nreport = skill.search(\n    \"exposed .git directories on example.com\",\n    depth=\"deep\",\n)\n\nOSINT investigation — people\n\n# By name\nreport = skill.search(\n    'OSINT investigation on \"John Doe\" — social media, email, profiles',\n    depth=\"deep\",\n)\n\n# By email\nreport = skill.search(\n    \"investigate john.doe@example.com — find all accounts and mentions\",\n    depth=\"exhaustive\",\n)\n\n# By username\nreport = skill.search(\n    \"find all accounts for username @johndoe42\",\n    depth=\"deep\",\n)\n\n# By phone number\nreport = skill.search(\n    \"lookup phone number +1-555-123-4567\",\n    depth=\"standard\",\n)\n\nOSINT investigation — domains and companies\n\n# Domain reconnaissance\nreport = skill.search(\n    \"full domain recon on target.com — subdomains, DNS, certificates, technology stack\",\n    depth=\"exhaustive\",\n)\n\n# Company investigation\nreport = skill.search(\n    'investigate company \"Acme Corp\" — employees, filings, data breaches',\n    depth=\"deep\",\n)\n\n# IP address lookup\nreport = skill.search(\n    \"investigate IP 192.168.1.1 — open ports, services, abuse reports\",\n    depth=\"standard\",\n)\n\nSEO analysis\n\n# Site indexation check\nreport = skill.search(\n    \"SEO indexation analysis of example.com\",\n    depth=\"standard\",\n)\n\n# Backlink research\nreport = skill.search(\n    \"find backlinks pointing to example.com\",\n    depth=\"deep\",\n)\n\n# Competitor analysis\nreport = skill.search(\n    \"SEO competitor analysis for example.com — related sites, ranking keywords\",\n    depth=\"deep\",\n)\n\n# Technical SEO audit\nreport = skill.search(\n    \"technical SEO check on example.com — sitemap, robots.txt, canonical, hreflang\",\n    depth=\"deep\",\n)\n\nAcademic research\n\n# Find papers\nreport = skill.search(\n    \"latest research papers on transformer architecture scaling laws 2024\",\n    depth=\"standard\",\n)\n\n# Find datasets\nreport = skill.search(\n    \"download dataset for sentiment analysis benchmark CSV\",\n    depth=\"standard\",\n)\n\n# Find authors and their work\nreport = skill.search(\n    'research publications by author \"Yann LeCun\" on deep learning',\n    depth=\"deep\",\n)\n\nCode and developer search\n\n# Find repositories\nreport = skill.search(\n    \"python library for PDF text extraction with OCR support\",\n    depth=\"standard\",\n)\n\n# Find packages\nreport = skill.search(\n    \"npm package for real-time WebSocket pub/sub\",\n    depth=\"standard\",\n)\n\n# Debug errors\nreport = skill.search(\n    \"RuntimeError: CUDA out of memory pytorch solution\",\n    depth=\"standard\",\n)\n\n# Find documentation\nreport = skill.search(\n    \"FastAPI dependency injection documentation examples\",\n    depth=\"quick\",\n)\n\nFile hunting\n\n# Find specific file types\nreport = skill.search(\n    \"machine learning cheat sheet filetype:pdf\",\n    depth=\"standard\",\n)\n\n# Find datasets\nreport = skill.search(\n    \"US census data 2023 download CSV\",\n    depth=\"standard\",\n)\n\n# Find configuration files\nreport = skill.search(\n    \"docker-compose example microservices filetype:yaml\",\n    depth=\"standard\",\n)\n\nNews search\n\n# Recent news\nreport = skill.search(\n    \"latest news on AI regulation this week\",\n    depth=\"standard\",\n)\n\n# Breaking news\nreport = skill.search(\n    \"breaking news today cybersecurity\",\n    depth=\"quick\",\n)\n\n# News analysis\nreport = skill.search(\n    \"analysis of EU AI Act implications for startups\",\n    depth=\"standard\",\n)\n\nImage and video search\n\n# Images\nreport = skill.search(\n    \"high resolution photos of Mars surface NASA\",\n    depth=\"standard\",\n)\n\n# Videos\nreport = skill.search(\n    \"video tutorial on Kubernetes deployment strategies\",\n    depth=\"standard\",\n)\n\nSocial media search\n\n# Reddit discussions\nreport = skill.search(\n    \"reddit discussion about best self-hosted alternatives to Google Photos\",\n    depth=\"standard\",\n)\n\n# Forum threads\nreport = skill.search(\n    \"forum thread comparing Proxmox vs ESXi for home lab\",\n    depth=\"standard\",\n)\n\nDirect dork query (no intent parsing)\n\n# Execute a raw dork you've written yourself\nreport = skill.search_dork(\n    'site:github.com \"API_KEY\" filetype:env',\n    engines=[\"google\", \"bing\"],\n)\n\nprint(report.to_context())\n\nPreview queries without executing them\n\n# See what dork queries would be generated\ndorks = skill.suggest_queries(\n    \"find SQL injection vulnerabilities on target.com\"\n)\n\nfor d in dorks:\n    print(f\"  Query: {d.query}\")\n    print(f\"  Operators: {d.operators_used}\")\n    print(f\"  Purpose: {d.purpose}\")\n    print()\n\nBuild a custom dork from parameters\n\ndork = skill.build_dork(\n    keyword=\"confidential\",\n    domain=\"example.com\",\n    filetype=\"pdf\",\n    intitle=\"report\",\n    exclude=[\"public\", \"template\"],\n    exact_match=True,\n)\n\nprint(f\"Generated: {dork.query}\")\n# → site:example.com filetype:pdf intitle:\"report\" -public -template \"confidential\"\n\n# Execute it\nreport = skill.search_dork(dork.query)\n\nExecute a named strategy against a target\n\n# Full OSINT chain\nreport = skill.execute_strategy(\n    strategy_name=\"osint_chain\",\n    target=\"suspect-domain.com\",\n    depth=\"exhaustive\",\n)\n\n# Deep security dive\nreport = skill.execute_strategy(\n    strategy_name=\"deep_dive\",\n    target=\"target.com\",\n    depth=\"deep\",\n)\n\n# File hunting\nreport = skill.execute_strategy(\n    strategy_name=\"file_hunt\",\n    target=\"example.com\",\n    depth=\"deep\",\n)\n\n# Temporal trend analysis\nreport = skill.execute_strategy(\n    strategy_name=\"temporal\",\n    target=\"AI regulation news\",\n    depth=\"deep\",\n)\n\nBatch search — multiple queries at once\n\nqueries = [\n    \"python FastAPI vs Flask performance\",\n    \"rust web frameworks comparison 2024\",\n    \"go gin framework documentation\",\n]\n\nreports = skill.search_batch(queries, depth=\"quick\")\n\nfor report in reports:\n    print(f\"Query: {report.query}\")\n    print(f\"  Results: {len(report.results)}\")\n    print(f\"  Best: {report.top(1)[0].title if report.results else 'None'}\")\n    print()\n\nOverride engine and category selection\n\n# Force specific engines\nreport = skill.search(\n    \"quantum computing breakthroughs\",\n    engines=[\"google_scholar\", \"arxiv\", \"semantic_scholar\"],\n)\n\n# Force specific categories\nreport = skill.search(\n    \"kubernetes tutorial\",\n    categories=[\"it\", \"general\"],\n)\n\n# Force time range\nreport = skill.search(\n    \"zero-day vulnerabilities\",\n    time_range=\"week\",\n)\n\n# Force language\nreport = skill.search(\n    \"machine learning tutorials\",\n    language=\"en\",\n)\n\nWorking with the SearchReport object\n\nreport = skill.search(\"advanced persistent threats 2024\", depth=\"standard\")\n\n# LLM-ready text (for injecting into AI agent context)\ncontext = report.to_context(max_results=20)\n\n# Top N results sorted by relevance\ntop5 = report.top(5)\n\n# Full result list\nall_results = report.results\n\n# What was detected\nprint(f\"Intent: {report.intent.category.value}\")        # e.g. \"security\"\nprint(f\"Subcategory: {report.intent.subcategory}\")       # e.g. \"general\"\nprint(f\"Entities: {report.intent.entities}\")             # e.g. {\"year\": \"2024\"}\nprint(f\"Keywords: {report.intent.keywords}\")             # e.g. [\"advanced\", \"persistent\", \"threats\"]\nprint(f\"Confidence: {report.intent.confidence:.0%}\")     # e.g. \"80%\"\n\n# What strategy ran\nprint(f\"Strategy: {report.strategy.name}\")               # e.g. \"multi_angle\"\nprint(f\"Steps: {len(report.strategy.steps)}\")            # e.g. 2\n\n# Performance metrics\nprint(f\"Total found: {report.total_found}\")              # before dedup\nprint(f\"Final results: {len(report.results)}\")           # after dedup+scoring\nprint(f\"Time: {report.timing_seconds:.2f}s\")\nprint(f\"Engines used: {report.engines_used}\")\n\n# Suggested refinements\nprint(f\"Suggestions: {report.suggestions}\")\n\n# Errors (if any)\nprint(f\"Errors: {report.errors}\")\n\nWorking with individual SearchResult objects\n\nfor r in report.top(10):\n    print(f\"Title:     {r.title}\")\n    print(f\"URL:       {r.url}\")\n    print(f\"Snippet:   {r.snippet[:300]}\")\n    print(f\"Relevance: {r.relevance:.2f} / 10.0\")\n    print(f\"Engines:   {r.engines}\")           # which SearXNG engines returned this\n    print(f\"Score:     {r.score}\")              # raw SearXNG score\n    print(f\"Category:  {r.category}\")           # SearXNG result category\n    print(f\"Positions: {r.positions}\")          # rank positions across engines\n    print(f\"Metadata:  {r.metadata}\")           # publishedDate, thumbnail, etc.\n    print()"
      },
      {
        "title": "AI Agent Integration",
        "body": "Basic tool handler\n\nfrom search_intelligence_skill import SearchSkill\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\n\ndef handle_search_tool(user_query: str) -> str:\n    \"\"\"Called by the AI agent when it needs to search the web.\"\"\"\n    report = skill.search(user_query, depth=\"standard\")\n    return report.to_context()\n\nWith depth control from agent\n\ndef handle_search_tool(user_query: str, depth: str = \"standard\") -> str:\n    report = skill.search(user_query, depth=depth)\n    return report.to_context()\n\nReturning structured data to agent\n\ndef handle_search_tool(user_query: str) -> dict:\n    report = skill.search(user_query, depth=\"standard\")\n    return {\n        \"query\": report.query,\n        \"intent\": f\"{report.intent.category.value}/{report.intent.subcategory}\",\n        \"confidence\": report.intent.confidence,\n        \"result_count\": len(report.results),\n        \"results\": [\n            {\n                \"title\": r.title,\n                \"url\": r.url,\n                \"snippet\": r.snippet[:500],\n                \"relevance\": round(r.relevance, 2),\n                \"engines\": r.engines,\n            }\n            for r in report.top(10)\n        ],\n        \"suggestions\": report.suggestions,\n        \"engines_used\": report.engines_used,\n        \"time_seconds\": round(report.timing_seconds, 2),\n    }\n\nOpenAI function calling / tool definition\n\nsearch_tool_schema = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"web_search\",\n        \"description\": (\n            \"Search the internet using advanced dork queries and multi-engine strategies. \"\n            \"Supports security scanning, OSINT, SEO analysis, academic research, \"\n            \"code search, file hunting, and general web search. \"\n            \"Describe what you want to find in natural language.\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"query\": {\n                    \"type\": \"string\",\n                    \"description\": \"Natural language search query describing what to find\",\n                },\n                \"depth\": {\n                    \"type\": \"string\",\n                    \"enum\": [\"quick\", \"standard\", \"deep\", \"exhaustive\"],\n                    \"description\": \"Search thoroughness: quick (1-2 queries), standard (3-6), deep (6-12), exhaustive (12+)\",\n                    \"default\": \"standard\",\n                },\n            },\n            \"required\": [\"query\"],\n        },\n    },\n}\n\nLangChain tool wrapper\n\nfrom langchain.tools import Tool\nfrom search_intelligence_skill import SearchSkill\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\n\nsearch_tool = Tool(\n    name=\"web_search\",\n    description=(\n        \"Advanced web search with dork generation and multi-engine strategies. \"\n        \"Input a natural language query. Supports security, OSINT, SEO, academic, \"\n        \"code, file, and general searches.\"\n    ),\n    func=lambda q: skill.search(q, depth=\"standard\").to_context(),\n)\n\nContext manager for clean resource handling\n\nwith SearchSkill(searxng_url=\"http://localhost:8888\") as skill:\n    report = skill.search(\"find open redirects on example.com\")\n    print(report.to_context())\n# HTTP client is automatically closed"
      },
      {
        "title": "Using Individual Components Directly",
        "body": "IntentParser — analyze queries without searching\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\nintent = parser.parse(\"find exposed .env files on example.com\")\n\nprint(f\"Category:    {intent.category.value}\")     # security\nprint(f\"Subcategory: {intent.subcategory}\")         # exposed_files\nprint(f\"Entities:    {intent.entities}\")            # {\"domain\": \"example.com\"}\nprint(f\"Keywords:    {intent.keywords}\")            # [\"exposed\", \"env\", \"files\"]\nprint(f\"Depth:       {intent.depth.value}\")         # standard\nprint(f\"Time range:  {intent.time_range}\")          # \"\"\nprint(f\"Confidence:  {intent.confidence:.0%}\")      # 95%\nprint(f\"Constraints: {intent.constraints}\")         # {}\n\nDorkGenerator — generate queries without searching\n\nfrom search_intelligence_skill import DorkGenerator, IntentParser\n\nparser = IntentParser()\ngen = DorkGenerator()\n\nintent = parser.parse(\"OSINT investigation on john@example.com\")\ndorks = gen.generate(intent)\n\nfor d in dorks:\n    print(f\"  [{', '.join(d.operators_used)}] {d.query}\")\n    print(f\"  Purpose: {d.purpose}\")\n\n# Build a custom dork manually\ncustom = gen.generate_custom(\n    keyword=\"secret\",\n    domain=\"example.com\",\n    filetype=\"env\",\n    intitle=\"config\",\n    exclude=[\"test\", \"demo\"],\n    exact_match=True,\n)\nprint(f\"Custom: {custom.query}\")\n\n# Translate a Google dork to Yandex syntax\nyandex_dork = gen.translate(custom, target_engine=\"yandex\")\nprint(f\"Yandex: {yandex_dork.query}\")\n\n# Translate to Bing\nbing_dork = gen.translate(custom, target_engine=\"bing\")\nprint(f\"Bing:   {bing_dork.query}\")\n\nResultAnalyzer — score and analyze results\n\nfrom search_intelligence_skill import ResultAnalyzer, IntentParser, SearXNGClient\n\nclient = SearXNGClient(base_url=\"http://localhost:8888\")\nparser = IntentParser()\nanalyzer = ResultAnalyzer()\n\nintent = parser.parse(\"python web frameworks comparison\")\n\nraw = client.search(\"python web frameworks comparison\", engines=[\"google\", \"bing\"])\nresults = client.parse_results(raw)\n\n# Full analysis pipeline: deduplicate → score → sort\nanalyzed = analyzer.analyze(results, intent)\n\nfor r in analyzed[:5]:\n    print(f\"[{r.relevance:.2f}] {r.title}\")\n\n# Generate refinement suggestions\nsuggestions = analyzer.generate_refinements(analyzed, intent)\nprint(f\"Suggestions: {suggestions}\")\n\n# Get a text summary\nsummary = analyzer.summarize(analyzed, intent)\nprint(summary)\n\nclient.close()\n\nSearXNGClient — direct API access\n\nfrom search_intelligence_skill import SearXNGClient\n\nclient = SearXNGClient(base_url=\"http://localhost:8888\")\n\n# Single search\nraw = client.search(\n    query='site:github.com \"fastapi\" filetype:py',\n    engines=[\"google\", \"bing\", \"duckduckgo\"],\n    categories=[\"general\"],\n    time_range=\"month\",\n    language=\"en\",\n    pageno=1,\n    safesearch=0,\n)\n\n# Parse results into SearchResult objects\nresults = client.parse_results(raw)\n\n# Get SearXNG suggestions\nsuggestions = client.get_suggestions(raw)\n\n# Get spelling corrections\ncorrections = client.get_corrections(raw)\n\n# See which engines failed\nunresponsive = client.get_unresponsive(raw)\n\n# Batch search\nresponses = client.search_batch(\n    queries=[\"query 1\", \"query 2\", \"query 3\"],\n    engines=[\"google\"],\n)\n\n# Health check\nif client.health_check():\n    print(\"SearXNG is online\")\n\nclient.close()"
      },
      {
        "title": "Quick Reference",
        "body": "Search Depths\n\nfrom search_intelligence_skill import Depth\n\nDepth.QUICK        # 1-2 queries, single step, fast lookups\nDepth.STANDARD     # 3-6 queries, multi-engine, general searching\nDepth.DEEP         # 6-12 queries, multi-step, thorough research\nDepth.EXHAUSTIVE   # 12+ queries, full sweep, complete investigations\n\nIntent Categories (auto-detected)\n\nfrom search_intelligence_skill import IntentCategory\n\nIntentCategory.GENERAL    # General web search\nIntentCategory.SECURITY   # Vulnerabilities, exposed files, pentesting\nIntentCategory.SEO        # Indexation, backlinks, competitors, technical SEO\nIntentCategory.OSINT      # People, emails, usernames, domains, companies\nIntentCategory.ACADEMIC   # Papers, datasets, authors, journals\nIntentCategory.CODE       # Repositories, packages, docs, bugs\nIntentCategory.FILES      # Documents, data files, archives, media\nIntentCategory.NEWS       # Breaking news, analysis, trends\nIntentCategory.IMAGES     # Image search\nIntentCategory.VIDEOS     # Video search\nIntentCategory.SOCIAL     # Reddit, forums, discussions\nIntentCategory.SHOPPING   # Products, prices, comparisons\nIntentCategory.LEGAL      # Law, regulations, patents\nIntentCategory.MEDICAL    # Health, diseases, clinical research\n\nSearch Strategies (auto-selected by depth + intent)\n\n# Strategies are selected automatically, but you can also invoke them directly:\nskill.execute_strategy(\"quick\", target=\"example.com\")           # 1 step, top engines\nskill.execute_strategy(\"broad_to_narrow\", target=\"example.com\") # Wide then focused\nskill.execute_strategy(\"multi_angle\", target=\"example.com\")     # Same topic, different formulations\nskill.execute_strategy(\"deep_dive\", target=\"example.com\")       # Exhaustive dork coverage\nskill.execute_strategy(\"osint_chain\", target=\"example.com\")     # Progressive recon\nskill.execute_strategy(\"verify\", target=\"some claim\")           # Cross-reference sources\nskill.execute_strategy(\"file_hunt\", target=\"example.com\")       # Targeted file search\nskill.execute_strategy(\"temporal\", target=\"AI news\")            # Across time periods\n\nSupported SearXNG Engines (90+)\n\n# General: google, bing, duckduckgo, brave, qwant, startpage, mojeek,\n#          yandex, yahoo, presearch, wiby, stract, yep, baidu, naver ...\n#\n# IT/Dev:  github, stackoverflow, gitlab, npm, pypi, dockerhub,\n#          arch_linux_wiki, crates_io, packagist, pkg_go_dev ...\n#\n# Science: arxiv, google_scholar, semantic_scholar, crossref, pubmed,\n#          base, openalex, core, wolfram_alpha ...\n#\n# News:    google_news, bing_news, yahoo_news, brave_news, wikinews ...\n#\n# Social:  reddit, lemmy, mastodon, hacker_news, lobsters ...\n#\n# Images:  google_images, bing_images, flickr, unsplash, openverse ...\n#\n# Videos:  youtube, google_videos, dailymotion, vimeo, piped, odysee ...\n#\n# Files:   piratebay, 1337x, annas_archive, z_library ...\n#\n# Music:   bandcamp, genius, soundcloud, youtube_music ...\n#\n# Maps:    openstreetmap, photon ...\n#\n# Wikis:   wikipedia, wikidata, wikimedia_commons ...\n\nDork Operators (auto-translated across engines)\n\n# Google operators:\n#   site:  filetype:  intitle:  allintitle:  inurl:  allinurl:\n#   intext:  allintext:  inanchor:  cache:  related:  info:  define:\n#   before:  after:  AROUND(N)  \"exact\"  -exclude  OR  *  N..M\n#\n# Bing operators:\n#   site:  filetype:  intitle:  inurl:  inbody:  contains:  ip:\n#   language:  loc:  prefer:  feed:  \"exact\"  -exclude  OR  NEAR:N\n#\n# DuckDuckGo operators:\n#   site:  filetype:  intitle:  inurl:  \"exact\"  -exclude  OR\n#\n# Yandex operators:\n#   site:  mime:  title:  inurl:  host:  domain:  lang:  date:\n#   \"exact\"  -exclude  |\n#\n# Brave operators:\n#   site:  filetype:  intitle:  inurl:  \"exact\"  -exclude  OR\n#\n# The skill auto-translates between engines:\n#   filetype: → mime: (Yandex)\n#   intitle:  → title: (Yandex)\n#   intext:   → inbody: (Bing)"
      },
      {
        "title": "Dork Template Library",
        "body": "Security dorks available (by subcategory)\n\nexposed_files      — .env, .log, .sql, .bak, .conf, .pem, .key, .json\ndirectory_listing  — \"index of\", \"directory listing\", \"parent directory\"\nadmin_panels       — /admin, /login, /dashboard, wp-admin, phpmyadmin, cpanel\nsensitive_data     — passwords, RSA keys, AWS keys, database URLs, SMTP creds\nexposed_apis       — /api/, swagger, api-docs, graphql, openapi\nsubdomains         — site:*.domain, external references, inurl:domain\ngit_exposed        — .git, .git/config, .svn, .hg\ntechnology_stack   — \"powered by\", wp-content, X-Powered-By\ngeneral            — CVE, exploit, PoC, security advisory\n\nOSINT dorks available (by subcategory)\n\nperson    — LinkedIn, Twitter/X, Facebook, Instagram, GitHub, Medium, resume, CV\nemail     — email mentions, cross-site, leaks, LinkedIn, GitHub\nusername  — GitHub, Reddit, Twitter, Instagram, YouTube, Keybase, StackOverflow\ndomain    — site:, subdomains, whois, Shodan, DNS, SSL, Censys, crt.sh\ncompany   — LinkedIn company, Crunchbase, Glassdoor, SEC filings, employees\nphone     — whitepages, truecaller, Facebook, name/address\nip        — Shodan, abuse/blacklist, open ports, whois\n\nSEO dorks available (by subcategory)\n\nindexation     — site:, sitemap, blog, tag/category pages\nbacklinks      — external mentions, anchor text, link:\ncompetitors    — related:, same-keyword competitors\ncontent_audit  — intitle/inurl/intext keyword matching\ntechnical_seo  — sitemap XML, robots.txt, noindex, canonical, hreflang, schema\n\nAcademic dorks available (by subcategory)\n\npapers   — arxiv, ResearchGate, academia.edu, DOI, .edu PDFs\ndatasets — CSV, JSON, Kaggle, HuggingFace, Zenodo\nauthors  — Google Scholar, ORCID, ResearchGate, publication lists\n\nCode dorks available (by subcategory)\n\nrepositories  — GitHub, GitLab, Bitbucket, Codeberg, Sourcehut\npackages      — npm, PyPI, crates.io, RubyGems, Packagist, pkg.go.dev\ndocumentation — ReadTheDocs, README, API references\nissues_bugs   — GitHub issues, StackOverflow errors"
      },
      {
        "title": "Advanced Usage",
        "body": "Cross-engine dork translation\n\nfrom search_intelligence_skill import DorkGenerator\n\ngen = DorkGenerator()\n\n# Build a Google dork\ndork = gen.generate_custom(\n    keyword=\"secret\",\n    domain=\"example.com\",\n    filetype=\"env\",\n    intitle=\"config\",\n)\nprint(f\"Google: {dork.query}\")\n# → site:example.com filetype:env intitle:\"config\" secret\n\n# Translate to Yandex (filetype → mime, intitle → title)\nyandex = gen.translate(dork, \"yandex\")\nprint(f\"Yandex: {yandex.query}\")\n# → site:example.com mime:env title:\"config\" secret\n\n# Translate to Bing\nbing = gen.translate(dork, \"bing\")\nprint(f\"Bing:   {bing.query}\")\n\n# Translate to DuckDuckGo (drops unsupported operators)\nddg = gen.translate(dork, \"duckduckgo\")\nprint(f\"DDG:    {ddg.query}\")\n\n# Translate to an engine without operator support (strips all operators)\nplain = gen.translate(dork, \"wikipedia\")\nprint(f\"Plain:  {plain.query}\")\n\nResult scoring details\n\n# Each result is scored on 7 signals (0-10 scale):\n#\n# 1. SearXNG base score (normalized)        — weight: 2.0\n# 2. Keyword match in title + snippet       — weight: 3.0\n# 3. Multi-engine agreement (appeared in N)  — weight: 0.5/engine, max 2.0\n# 4. Position rank (lower = better)          — weight: 1.5\n# 5. Source credibility (.gov +1.5, .edu +1.4, arxiv +1.4, etc.)\n# 6. Content quality (snippet length, HTTPS, URL sanity)\n# 7. Intent-specific boost (arxiv for academic, github for code, etc.)\n#\n# Credibility penalties: spam (-0.7), \"click here\" (-0.5), \"free download\" (-0.4)\n\nAuto-refinement behavior\n\n# When auto_refine=True (default) and results < 5:\n# 1. Analyzer generates refined queries (broader, different keywords)\n# 2. Skill executes up to 3 refinement queries\n# 3. New results are merged with originals\n# 4. Full dedup + re-scoring runs\n# 5. Process repeats up to max_refine_rounds\n\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    auto_refine=True,\n    max_refine_rounds=2,  # Try refining up to 2 times\n)\n\n# Disable auto-refinement for speed-critical paths\nskill_fast = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    auto_refine=False,\n)\n\nEntity extraction capabilities\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\n\n# Domains\nintent = parser.parse(\"scan example.com for vulnerabilities\")\n# entities: {\"domain\": \"example.com\"}\n\n# Emails\nintent = parser.parse(\"investigate user@company.com\")\n# entities: {\"email\": \"user@company.com\", \"email_domain\": \"company.com\"}\n\n# IPs\nintent = parser.parse(\"lookup 192.168.1.1\")\n# entities: {\"ip\": \"192.168.1.1\"}\n\n# CVEs\nintent = parser.parse(\"details on CVE-2024-3094\")\n# entities: {\"cve\": \"CVE-2024-3094\"}\n\n# Phone numbers\nintent = parser.parse(\"find owner of +1-555-123-4567\")\n# entities: {\"phone\": \"+1-555-123-4567\"}\n\n# Usernames\nintent = parser.parse(\"find accounts for @johndoe42\")\n# entities: {\"username\": \"johndoe42\"}\n\n# Names (quoted)\nintent = parser.parse('investigate \"John Smith\"')\n# entities: {\"name\": \"John Smith\"}\n\n# Names (capitalized pattern)\nintent = parser.parse(\"find information about Jane Doe\")\n# entities: {\"name\": \"Jane Doe\"}\n\n# File types\nintent = parser.parse(\"find documents filetype:pdf\")\n# entities: {\"filetype\": \"pdf\"}\n\n# Years\nintent = parser.parse(\"research papers from 2024\")\n# entities: {\"year\": \"2024\"}\n\n# Multiple entities combined\nintent = parser.parse('CVE-2024-3094 on example.com \"John Doe\"')\n# entities: {\"cve\": \"CVE-2024-3094\", \"domain\": \"example.com\", \"name\": \"John Doe\"}\n\nTime range detection\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\n\nparser.parse(\"news today\").time_range                  # \"day\"\nparser.parse(\"what happened this week\").time_range     # \"week\"\nparser.parse(\"articles from last month\").time_range    # \"month\"\nparser.parse(\"publications this year\").time_range      # \"year\"\nparser.parse(\"latest updates on AI\").time_range        # \"month\" (heuristic)\nparser.parse(\"history of computing\").time_range        # \"\" (no time constraint)\n\nConstraint extraction\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\n\n# Language constraints\nintent = parser.parse(\"machine learning tutorials in spanish\")\n# constraints: {\"language\": \"es\"}\n\n# Exhaustive hints\nintent = parser.parse(\"find everything about this vulnerability\")\n# constraints: {\"exhaustive\": True}\n\n# Result limits\nintent = parser.parse(\"top 20 python frameworks\")\n# constraints: {\"limit\": 20}\n\n# Exclusion hints\nintent = parser.parse(\"web frameworks except Django without Flask\")\n# constraints: {\"exclude\": [\"django\", \"flask\"]}\n\nPagination\n\nfrom search_intelligence_skill import SearXNGClient\n\nclient = SearXNGClient(base_url=\"http://localhost:8888\")\n\n# Fetch multiple pages\nall_results = []\nfor page in range(1, 4):\n    raw = client.search(\"python frameworks\", pageno=page)\n    results = client.parse_results(raw)\n    all_results.extend(results)\n    if not results:\n        break\n\nprint(f\"Total across 3 pages: {len(all_results)}\")\nclient.close()\n\nRate limiting and retries\n\n# Built-in rate limiting between requests\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    rate_limit=1.0,    # 1 second minimum between requests\n    max_retries=3,     # Retry failed requests up to 3 times\n    timeout=30.0,      # 30 second timeout per request\n)\n\n# Rate limiting is automatic — no manual sleep() needed\n# Retries use increasing delays on 429 (Too Many Requests)\n\nLogging for debugging\n\nimport logging\n\n# See everything the skill does\nlogging.basicConfig(level=logging.DEBUG)\n\n# Or just info-level\nlogging.basicConfig(level=logging.INFO)\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\nreport = skill.search(\"test query\", depth=\"standard\")\n\n# Logs will show:\n# INFO — Intent: security/exposed_files (confidence=0.95) — entities: {\"domain\": \"...\"}\n# INFO — Strategy: multi_angle — 2 steps\n# DEBUG — Executing step 1: Search angle 1\n# DEBUG — Search 'site:... filetype:env' returned 12 results\n# DEBUG — Executing step 2: Search angle 2\n# INFO — Search complete: 23 results, 4.21s, 4 engines"
      },
      {
        "title": "API Methods",
        "body": "MethodPurposeReturnsskill.search(query, depth, ...)Full intelligent search pipelineSearchReportskill.search_dork(dork, ...)Execute raw dork query directlySearchReportskill.suggest_queries(query)Preview dorks without executinglist[DorkQuery]skill.build_dork(keyword, ...)Build custom dork from parametersDorkQueryskill.execute_strategy(name, target)Run named strategy against targetSearchReportskill.search_batch(queries, ...)Execute multiple searcheslist[SearchReport]skill.health_check()Check SearXNG connectivityboolskill.close()Close HTTP clientNone"
      },
      {
        "title": "SearchReport Properties",
        "body": "PropertyTypeDescription.querystrOriginal natural language query.intentSearchIntentParsed intent with category, entities, keywords.strategySearchStrategyStrategy that was used (name, steps).resultslist[SearchResult]Scored and deduplicated results.total_foundintTotal results before deduplication.suggestionslist[str]Refinement suggestions.refined_querieslist[str]Auto-refinement queries used.errorslist[str]Errors encountered during search.timing_secondsfloatTotal wall-clock time.engines_usedlist[str]Engines that returned results.to_context(max_results)strLLM-formatted text output.top(n)list[SearchResult]Top N by relevance score"
      },
      {
        "title": "SearchResult Properties",
        "body": "PropertyTypeDescription.titlestrResult title.urlstrResult URL.snippetstrContent snippet / description.engineslist[str]Which SearXNG engines returned it.scorefloatRaw SearXNG score.relevancefloatComputed multi-signal relevance (0-10).categorystrSearXNG result category.positionslist[int]Rank positions across engines.metadatadictExtra fields: publishedDate, thumbnail, img_src"
      },
      {
        "title": "Troubleshooting",
        "body": "SearXNG not reachable\n\n# Check the instance is running\ncurl http://localhost:8888/healthz\n\n# Check JSON API is enabled\ncurl \"http://localhost:8888/search?q=test&format=json\"\n\n# Common fixes:\n# 1. Ensure port mapping is correct (docker: -p 8888:8080)\n# 2. Ensure search.formats includes \"json\" in settings.yml\n# 3. Check firewall rules\n\nif not skill.health_check():\n    print(\"SearXNG unreachable — check URL, port, and settings\")\n\nNo results returned\n\nreport = skill.search(\"very specific obscure query\")\n\nif not report.results:\n    print(\"No results. Try:\")\n    print(\"  1. Broader keywords\")\n    print(\"  2. Different depth: depth='deep'\")\n    print(\"  3. Check suggestions:\", report.suggestions)\n    print(\"  4. Check errors:\", report.errors)\n    print(\"  5. Try different engines:\", report.engines_used)\n\n    # Manual broader search\n    report2 = skill.search(\"broader version of query\", depth=\"deep\")\n\nTimeout errors\n\n# Increase timeout for complex queries\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    timeout=60.0,      # 60 seconds\n    max_retries=3,     # More retries\n)\n\nRate limiting (429 errors)\n\n# Increase delay between requests\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    rate_limit=2.0,    # 2 seconds between requests\n)\n\nSSL errors (local development only)\n\nskill = SearchSkill(\n    searxng_url=\"https://localhost:8888\",\n    verify_ssl=False,  # ONLY for local dev — never in production\n)\n\nWrong intent detected\n\n# If the auto-detection picks the wrong category, use direct dork:\nreport = skill.search_dork(\n    'site:example.com filetype:pdf \"annual report\"',\n    engines=[\"google\", \"bing\"],\n)\n\n# Or force engines/categories:\nreport = skill.search(\n    \"some ambiguous query\",\n    engines=[\"google_scholar\", \"arxiv\"],\n    categories=[\"science\"],\n)\n\nMemory usage with large result sets\n\n# Limit results to control memory\nreport = skill.search(\"broad query\", depth=\"exhaustive\", max_results=50)\n\n# Process results in a streaming fashion\nfor r in report.results:\n    process(r)  # handle one at a time"
      },
      {
        "title": "How It All Works Together",
        "body": "User Query\n    │\n    ▼\n┌─────────────────┐\n│  IntentParser    │──→ category, subcategory, entities, keywords\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│  DorkGenerator   │──→ 5-20 optimized dork queries with operators\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│ StrategyPlanner  │──→ multi-step plan (which dorks, which engines, what order)\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│  SearXNGClient   │──→ executes queries against your instance (retries, rate limit)\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│ ResultAnalyzer   │──→ dedup, score, rank, credibility check\n└────────┬────────┘\n         │\n    (if results poor)\n         │\n         ▼\n┌─────────────────┐\n│  Auto-Refine     │──→ generate new queries, re-search, re-analyze\n└────────┬────────┘\n         │\n         ▼\n   SearchReport\n  .to_context() → LLM-ready text\n  .top(n) → best results\n  .results → full list"
      },
      {
        "title": "Notes",
        "body": "Privacy\n\nAll searches route through YOUR SearXNG instance\nZero API keys required for any engine\nNo data sent to third-party services (except through SearXNG's engine requests)\nSearXNG strips tracking parameters and anonymizes requests\n\nPerformance tips\n\nReuse the SearchSkill instance across searches (connection pooling)\nUse depth=\"quick\" for simple lookups, reserve \"deep\" / \"exhaustive\" for research\nSet auto_refine=False for speed-critical paths\nUse skill.suggest_queries() to preview before executing expensive searches\nBatch independent queries with skill.search_batch()\n\nAccuracy tips\n\nInclude specific entities in your query (domains, emails, CVEs, names)\nUse quoted phrases for exact matching: 'find \"exact phrase\"'\nSpecify time ranges when freshness matters: \"latest news this week\"\nUse depth=\"deep\" or \"exhaustive\" for comprehensive coverage\nCheck report.suggestions for refinement ideas\nCheck report.intent to verify the skill understood your query correctly\n\nExtending the skill\n\nAdd new dork templates in config.py → DORK_TEMPLATES\nAdd new intent signals in config.py → INTENT_SIGNALS\nAdd new engines in config.py → ENGINE_CATEGORIES\nAdd new operator translations in config.py → OPERATOR_SUPPORT\nAdd new strategies in config.py → STRATEGY_DEFINITIONS\nAdd new subcategory detection in intent.py → SUBCATEGORY_PATTERNS\n\nConfirm before sensitive operations\n\nSecurity scanning dorks may trigger alerts on target domains\nOSINT queries may involve personal information — use responsibly\nAlways validate that the target domain/entity is authorized for testing\nThis tool is for legitimate research, authorized security testing, and SEO analysis"
      }
    ],
    "body": "search-intelligence-skill\n\nUse search-intelligence-skill to give any AI agent the ability to search the entire internet like an expert OSINT analyst, SEO engineer, and security researcher combined. All searches flow through your SearXNG instance — zero API keys, full privacy, 90+ engines.\n\nThe skill generates optimized dork queries, selects intelligent multi-step search strategies, translates operators across engines, routes queries to the best SearXNG engines, scores results by multi-signal relevance, and learns from results to refine searches automatically.\n\nSetup (once)\n\nInstall the package\n\n# From source (recommended)\ngit clone https://github.com/mouaad-ops/search-intelligence-skill.git\ncd search-intelligence-skill\npip install -e .\n\n# Or direct pip\npip install search-intelligence-skill # NOT yet working\n\n\nStart a SearXNG instance (if you don't have one)\n\n# Docker (quickest)\ndocker run -d \\\n  --name searxng \\\n  -p 8888:8080 \\\n  -e SEARXNG_SECRET=your-secret-key \\\n  searxng/searxng:latest\n\n# Verify it's running\ncurl http://localhost:8888/healthz\n\n\nEnable JSON API in SearXNG settings\n\n# In searxng/settings.yml — ensure search formats include json\nsearch:\n  formats:\n    - html\n    - json\n\n\nInitialize in code\n\nfrom search_intelligence_skill import SearchSkill\n\n# Default — localhost:8888\nskill = SearchSkill()\n\n# Custom instance\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    timeout=30.0,\n    max_retries=2,\n    rate_limit=0.5,\n    verify_ssl=True,\n    auto_refine=True,\n    max_refine_rounds=1,\n)\n\n# Verify connection\nif skill.health_check():\n    print(\"✓ SearXNG is reachable\")\nelse:\n    print(\"✗ Cannot reach SearXNG — check URL and port\")\n\nCommon Commands\n\nNatural language search (the main interface)\n\nfrom search_intelligence_skill import SearchSkill\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\n\n# Just describe what you want — the skill handles everything:\n# intent detection, dork generation, engine selection, scoring\nreport = skill.search(\"find exposed .env files on example.com\")\n\n# Print LLM-ready formatted output\nprint(report.to_context())\n\n# Access structured results\nfor r in report.top(5):\n    print(f\"[{r.relevance:.1f}] {r.title}\")\n    print(f\"    {r.url}\")\n    print(f\"    {r.snippet[:200]}\")\n\n\nControl search depth\n\nfrom search_intelligence_skill import Depth\n\n# Quick — 1-2 queries, single step, fast lookup\nreport = skill.search(\"what is CORS\", depth=\"quick\")\n\n# Standard — 3-6 queries, multi-engine, good default\nreport = skill.search(\"python async frameworks comparison\", depth=\"standard\")\n\n# Deep — 6-12 queries, multi-step strategies, thorough research\nreport = skill.search(\"security audit of target.com\", depth=\"deep\")\n\n# Exhaustive — 12+ queries, full OSINT chains, complete sweep\nreport = skill.search(\"full recon on suspect-domain.com\", depth=\"exhaustive\")\n\n\nSecurity scanning — exposed files and panels\n\nreport = skill.search(\n    \"find exposed .env files, admin panels, and directory listings on example.com\",\n    depth=\"deep\",\n)\n\nprint(f\"Intent: {report.intent.category.value}/{report.intent.subcategory}\")\n# → Intent: security/exposed_files\n\nprint(f\"Strategy: {report.strategy.name}\")\n# → Strategy: multi_angle\n\nprint(f\"Results: {len(report.results)}\")\nfor r in report.top(10):\n    print(f\"  [{r.relevance:.1f}] {r.title} — {r.url}\")\n\n\nSecurity scanning — vulnerability research\n\n# CVE research\nreport = skill.search(\"CVE-2024-3094 xz backdoor exploit details\", depth=\"deep\")\n\n# Technology-specific vulnerabilities\nreport = skill.search(\n    \"Apache Struts remote code execution vulnerabilities 2024\",\n    depth=\"standard\",\n)\n\n# Exposed API endpoints\nreport = skill.search(\n    \"find exposed swagger API docs on target.com\",\n    depth=\"deep\",\n)\n\n# Git repository exposure\nreport = skill.search(\n    \"exposed .git directories on example.com\",\n    depth=\"deep\",\n)\n\n\nOSINT investigation — people\n\n# By name\nreport = skill.search(\n    'OSINT investigation on \"John Doe\" — social media, email, profiles',\n    depth=\"deep\",\n)\n\n# By email\nreport = skill.search(\n    \"investigate john.doe@example.com — find all accounts and mentions\",\n    depth=\"exhaustive\",\n)\n\n# By username\nreport = skill.search(\n    \"find all accounts for username @johndoe42\",\n    depth=\"deep\",\n)\n\n# By phone number\nreport = skill.search(\n    \"lookup phone number +1-555-123-4567\",\n    depth=\"standard\",\n)\n\n\nOSINT investigation — domains and companies\n\n# Domain reconnaissance\nreport = skill.search(\n    \"full domain recon on target.com — subdomains, DNS, certificates, technology stack\",\n    depth=\"exhaustive\",\n)\n\n# Company investigation\nreport = skill.search(\n    'investigate company \"Acme Corp\" — employees, filings, data breaches',\n    depth=\"deep\",\n)\n\n# IP address lookup\nreport = skill.search(\n    \"investigate IP 192.168.1.1 — open ports, services, abuse reports\",\n    depth=\"standard\",\n)\n\n\nSEO analysis\n\n# Site indexation check\nreport = skill.search(\n    \"SEO indexation analysis of example.com\",\n    depth=\"standard\",\n)\n\n# Backlink research\nreport = skill.search(\n    \"find backlinks pointing to example.com\",\n    depth=\"deep\",\n)\n\n# Competitor analysis\nreport = skill.search(\n    \"SEO competitor analysis for example.com — related sites, ranking keywords\",\n    depth=\"deep\",\n)\n\n# Technical SEO audit\nreport = skill.search(\n    \"technical SEO check on example.com — sitemap, robots.txt, canonical, hreflang\",\n    depth=\"deep\",\n)\n\n\nAcademic research\n\n# Find papers\nreport = skill.search(\n    \"latest research papers on transformer architecture scaling laws 2024\",\n    depth=\"standard\",\n)\n\n# Find datasets\nreport = skill.search(\n    \"download dataset for sentiment analysis benchmark CSV\",\n    depth=\"standard\",\n)\n\n# Find authors and their work\nreport = skill.search(\n    'research publications by author \"Yann LeCun\" on deep learning',\n    depth=\"deep\",\n)\n\n\nCode and developer search\n\n# Find repositories\nreport = skill.search(\n    \"python library for PDF text extraction with OCR support\",\n    depth=\"standard\",\n)\n\n# Find packages\nreport = skill.search(\n    \"npm package for real-time WebSocket pub/sub\",\n    depth=\"standard\",\n)\n\n# Debug errors\nreport = skill.search(\n    \"RuntimeError: CUDA out of memory pytorch solution\",\n    depth=\"standard\",\n)\n\n# Find documentation\nreport = skill.search(\n    \"FastAPI dependency injection documentation examples\",\n    depth=\"quick\",\n)\n\n\nFile hunting\n\n# Find specific file types\nreport = skill.search(\n    \"machine learning cheat sheet filetype:pdf\",\n    depth=\"standard\",\n)\n\n# Find datasets\nreport = skill.search(\n    \"US census data 2023 download CSV\",\n    depth=\"standard\",\n)\n\n# Find configuration files\nreport = skill.search(\n    \"docker-compose example microservices filetype:yaml\",\n    depth=\"standard\",\n)\n\n\nNews search\n\n# Recent news\nreport = skill.search(\n    \"latest news on AI regulation this week\",\n    depth=\"standard\",\n)\n\n# Breaking news\nreport = skill.search(\n    \"breaking news today cybersecurity\",\n    depth=\"quick\",\n)\n\n# News analysis\nreport = skill.search(\n    \"analysis of EU AI Act implications for startups\",\n    depth=\"standard\",\n)\n\n\nImage and video search\n\n# Images\nreport = skill.search(\n    \"high resolution photos of Mars surface NASA\",\n    depth=\"standard\",\n)\n\n# Videos\nreport = skill.search(\n    \"video tutorial on Kubernetes deployment strategies\",\n    depth=\"standard\",\n)\n\n\nSocial media search\n\n# Reddit discussions\nreport = skill.search(\n    \"reddit discussion about best self-hosted alternatives to Google Photos\",\n    depth=\"standard\",\n)\n\n# Forum threads\nreport = skill.search(\n    \"forum thread comparing Proxmox vs ESXi for home lab\",\n    depth=\"standard\",\n)\n\n\nDirect dork query (no intent parsing)\n\n# Execute a raw dork you've written yourself\nreport = skill.search_dork(\n    'site:github.com \"API_KEY\" filetype:env',\n    engines=[\"google\", \"bing\"],\n)\n\nprint(report.to_context())\n\n\nPreview queries without executing them\n\n# See what dork queries would be generated\ndorks = skill.suggest_queries(\n    \"find SQL injection vulnerabilities on target.com\"\n)\n\nfor d in dorks:\n    print(f\"  Query: {d.query}\")\n    print(f\"  Operators: {d.operators_used}\")\n    print(f\"  Purpose: {d.purpose}\")\n    print()\n\n\nBuild a custom dork from parameters\n\ndork = skill.build_dork(\n    keyword=\"confidential\",\n    domain=\"example.com\",\n    filetype=\"pdf\",\n    intitle=\"report\",\n    exclude=[\"public\", \"template\"],\n    exact_match=True,\n)\n\nprint(f\"Generated: {dork.query}\")\n# → site:example.com filetype:pdf intitle:\"report\" -public -template \"confidential\"\n\n# Execute it\nreport = skill.search_dork(dork.query)\n\n\nExecute a named strategy against a target\n\n# Full OSINT chain\nreport = skill.execute_strategy(\n    strategy_name=\"osint_chain\",\n    target=\"suspect-domain.com\",\n    depth=\"exhaustive\",\n)\n\n# Deep security dive\nreport = skill.execute_strategy(\n    strategy_name=\"deep_dive\",\n    target=\"target.com\",\n    depth=\"deep\",\n)\n\n# File hunting\nreport = skill.execute_strategy(\n    strategy_name=\"file_hunt\",\n    target=\"example.com\",\n    depth=\"deep\",\n)\n\n# Temporal trend analysis\nreport = skill.execute_strategy(\n    strategy_name=\"temporal\",\n    target=\"AI regulation news\",\n    depth=\"deep\",\n)\n\n\nBatch search — multiple queries at once\n\nqueries = [\n    \"python FastAPI vs Flask performance\",\n    \"rust web frameworks comparison 2024\",\n    \"go gin framework documentation\",\n]\n\nreports = skill.search_batch(queries, depth=\"quick\")\n\nfor report in reports:\n    print(f\"Query: {report.query}\")\n    print(f\"  Results: {len(report.results)}\")\n    print(f\"  Best: {report.top(1)[0].title if report.results else 'None'}\")\n    print()\n\n\nOverride engine and category selection\n\n# Force specific engines\nreport = skill.search(\n    \"quantum computing breakthroughs\",\n    engines=[\"google_scholar\", \"arxiv\", \"semantic_scholar\"],\n)\n\n# Force specific categories\nreport = skill.search(\n    \"kubernetes tutorial\",\n    categories=[\"it\", \"general\"],\n)\n\n# Force time range\nreport = skill.search(\n    \"zero-day vulnerabilities\",\n    time_range=\"week\",\n)\n\n# Force language\nreport = skill.search(\n    \"machine learning tutorials\",\n    language=\"en\",\n)\n\n\nWorking with the SearchReport object\n\nreport = skill.search(\"advanced persistent threats 2024\", depth=\"standard\")\n\n# LLM-ready text (for injecting into AI agent context)\ncontext = report.to_context(max_results=20)\n\n# Top N results sorted by relevance\ntop5 = report.top(5)\n\n# Full result list\nall_results = report.results\n\n# What was detected\nprint(f\"Intent: {report.intent.category.value}\")        # e.g. \"security\"\nprint(f\"Subcategory: {report.intent.subcategory}\")       # e.g. \"general\"\nprint(f\"Entities: {report.intent.entities}\")             # e.g. {\"year\": \"2024\"}\nprint(f\"Keywords: {report.intent.keywords}\")             # e.g. [\"advanced\", \"persistent\", \"threats\"]\nprint(f\"Confidence: {report.intent.confidence:.0%}\")     # e.g. \"80%\"\n\n# What strategy ran\nprint(f\"Strategy: {report.strategy.name}\")               # e.g. \"multi_angle\"\nprint(f\"Steps: {len(report.strategy.steps)}\")            # e.g. 2\n\n# Performance metrics\nprint(f\"Total found: {report.total_found}\")              # before dedup\nprint(f\"Final results: {len(report.results)}\")           # after dedup+scoring\nprint(f\"Time: {report.timing_seconds:.2f}s\")\nprint(f\"Engines used: {report.engines_used}\")\n\n# Suggested refinements\nprint(f\"Suggestions: {report.suggestions}\")\n\n# Errors (if any)\nprint(f\"Errors: {report.errors}\")\n\n\nWorking with individual SearchResult objects\n\nfor r in report.top(10):\n    print(f\"Title:     {r.title}\")\n    print(f\"URL:       {r.url}\")\n    print(f\"Snippet:   {r.snippet[:300]}\")\n    print(f\"Relevance: {r.relevance:.2f} / 10.0\")\n    print(f\"Engines:   {r.engines}\")           # which SearXNG engines returned this\n    print(f\"Score:     {r.score}\")              # raw SearXNG score\n    print(f\"Category:  {r.category}\")           # SearXNG result category\n    print(f\"Positions: {r.positions}\")          # rank positions across engines\n    print(f\"Metadata:  {r.metadata}\")           # publishedDate, thumbnail, etc.\n    print()\n\nAI Agent Integration\n\nBasic tool handler\n\nfrom search_intelligence_skill import SearchSkill\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\n\ndef handle_search_tool(user_query: str) -> str:\n    \"\"\"Called by the AI agent when it needs to search the web.\"\"\"\n    report = skill.search(user_query, depth=\"standard\")\n    return report.to_context()\n\n\nWith depth control from agent\n\ndef handle_search_tool(user_query: str, depth: str = \"standard\") -> str:\n    report = skill.search(user_query, depth=depth)\n    return report.to_context()\n\n\nReturning structured data to agent\n\ndef handle_search_tool(user_query: str) -> dict:\n    report = skill.search(user_query, depth=\"standard\")\n    return {\n        \"query\": report.query,\n        \"intent\": f\"{report.intent.category.value}/{report.intent.subcategory}\",\n        \"confidence\": report.intent.confidence,\n        \"result_count\": len(report.results),\n        \"results\": [\n            {\n                \"title\": r.title,\n                \"url\": r.url,\n                \"snippet\": r.snippet[:500],\n                \"relevance\": round(r.relevance, 2),\n                \"engines\": r.engines,\n            }\n            for r in report.top(10)\n        ],\n        \"suggestions\": report.suggestions,\n        \"engines_used\": report.engines_used,\n        \"time_seconds\": round(report.timing_seconds, 2),\n    }\n\n\nOpenAI function calling / tool definition\n\nsearch_tool_schema = {\n    \"type\": \"function\",\n    \"function\": {\n        \"name\": \"web_search\",\n        \"description\": (\n            \"Search the internet using advanced dork queries and multi-engine strategies. \"\n            \"Supports security scanning, OSINT, SEO analysis, academic research, \"\n            \"code search, file hunting, and general web search. \"\n            \"Describe what you want to find in natural language.\"\n        ),\n        \"parameters\": {\n            \"type\": \"object\",\n            \"properties\": {\n                \"query\": {\n                    \"type\": \"string\",\n                    \"description\": \"Natural language search query describing what to find\",\n                },\n                \"depth\": {\n                    \"type\": \"string\",\n                    \"enum\": [\"quick\", \"standard\", \"deep\", \"exhaustive\"],\n                    \"description\": \"Search thoroughness: quick (1-2 queries), standard (3-6), deep (6-12), exhaustive (12+)\",\n                    \"default\": \"standard\",\n                },\n            },\n            \"required\": [\"query\"],\n        },\n    },\n}\n\n\nLangChain tool wrapper\n\nfrom langchain.tools import Tool\nfrom search_intelligence_skill import SearchSkill\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\n\nsearch_tool = Tool(\n    name=\"web_search\",\n    description=(\n        \"Advanced web search with dork generation and multi-engine strategies. \"\n        \"Input a natural language query. Supports security, OSINT, SEO, academic, \"\n        \"code, file, and general searches.\"\n    ),\n    func=lambda q: skill.search(q, depth=\"standard\").to_context(),\n)\n\n\nContext manager for clean resource handling\n\nwith SearchSkill(searxng_url=\"http://localhost:8888\") as skill:\n    report = skill.search(\"find open redirects on example.com\")\n    print(report.to_context())\n# HTTP client is automatically closed\n\nUsing Individual Components Directly\n\nIntentParser — analyze queries without searching\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\nintent = parser.parse(\"find exposed .env files on example.com\")\n\nprint(f\"Category:    {intent.category.value}\")     # security\nprint(f\"Subcategory: {intent.subcategory}\")         # exposed_files\nprint(f\"Entities:    {intent.entities}\")            # {\"domain\": \"example.com\"}\nprint(f\"Keywords:    {intent.keywords}\")            # [\"exposed\", \"env\", \"files\"]\nprint(f\"Depth:       {intent.depth.value}\")         # standard\nprint(f\"Time range:  {intent.time_range}\")          # \"\"\nprint(f\"Confidence:  {intent.confidence:.0%}\")      # 95%\nprint(f\"Constraints: {intent.constraints}\")         # {}\n\n\nDorkGenerator — generate queries without searching\n\nfrom search_intelligence_skill import DorkGenerator, IntentParser\n\nparser = IntentParser()\ngen = DorkGenerator()\n\nintent = parser.parse(\"OSINT investigation on john@example.com\")\ndorks = gen.generate(intent)\n\nfor d in dorks:\n    print(f\"  [{', '.join(d.operators_used)}] {d.query}\")\n    print(f\"  Purpose: {d.purpose}\")\n\n# Build a custom dork manually\ncustom = gen.generate_custom(\n    keyword=\"secret\",\n    domain=\"example.com\",\n    filetype=\"env\",\n    intitle=\"config\",\n    exclude=[\"test\", \"demo\"],\n    exact_match=True,\n)\nprint(f\"Custom: {custom.query}\")\n\n# Translate a Google dork to Yandex syntax\nyandex_dork = gen.translate(custom, target_engine=\"yandex\")\nprint(f\"Yandex: {yandex_dork.query}\")\n\n# Translate to Bing\nbing_dork = gen.translate(custom, target_engine=\"bing\")\nprint(f\"Bing:   {bing_dork.query}\")\n\n\nResultAnalyzer — score and analyze results\n\nfrom search_intelligence_skill import ResultAnalyzer, IntentParser, SearXNGClient\n\nclient = SearXNGClient(base_url=\"http://localhost:8888\")\nparser = IntentParser()\nanalyzer = ResultAnalyzer()\n\nintent = parser.parse(\"python web frameworks comparison\")\n\nraw = client.search(\"python web frameworks comparison\", engines=[\"google\", \"bing\"])\nresults = client.parse_results(raw)\n\n# Full analysis pipeline: deduplicate → score → sort\nanalyzed = analyzer.analyze(results, intent)\n\nfor r in analyzed[:5]:\n    print(f\"[{r.relevance:.2f}] {r.title}\")\n\n# Generate refinement suggestions\nsuggestions = analyzer.generate_refinements(analyzed, intent)\nprint(f\"Suggestions: {suggestions}\")\n\n# Get a text summary\nsummary = analyzer.summarize(analyzed, intent)\nprint(summary)\n\nclient.close()\n\n\nSearXNGClient — direct API access\n\nfrom search_intelligence_skill import SearXNGClient\n\nclient = SearXNGClient(base_url=\"http://localhost:8888\")\n\n# Single search\nraw = client.search(\n    query='site:github.com \"fastapi\" filetype:py',\n    engines=[\"google\", \"bing\", \"duckduckgo\"],\n    categories=[\"general\"],\n    time_range=\"month\",\n    language=\"en\",\n    pageno=1,\n    safesearch=0,\n)\n\n# Parse results into SearchResult objects\nresults = client.parse_results(raw)\n\n# Get SearXNG suggestions\nsuggestions = client.get_suggestions(raw)\n\n# Get spelling corrections\ncorrections = client.get_corrections(raw)\n\n# See which engines failed\nunresponsive = client.get_unresponsive(raw)\n\n# Batch search\nresponses = client.search_batch(\n    queries=[\"query 1\", \"query 2\", \"query 3\"],\n    engines=[\"google\"],\n)\n\n# Health check\nif client.health_check():\n    print(\"SearXNG is online\")\n\nclient.close()\n\nQuick Reference\n\nSearch Depths\n\nfrom search_intelligence_skill import Depth\n\nDepth.QUICK        # 1-2 queries, single step, fast lookups\nDepth.STANDARD     # 3-6 queries, multi-engine, general searching\nDepth.DEEP         # 6-12 queries, multi-step, thorough research\nDepth.EXHAUSTIVE   # 12+ queries, full sweep, complete investigations\n\n\nIntent Categories (auto-detected)\n\nfrom search_intelligence_skill import IntentCategory\n\nIntentCategory.GENERAL    # General web search\nIntentCategory.SECURITY   # Vulnerabilities, exposed files, pentesting\nIntentCategory.SEO        # Indexation, backlinks, competitors, technical SEO\nIntentCategory.OSINT      # People, emails, usernames, domains, companies\nIntentCategory.ACADEMIC   # Papers, datasets, authors, journals\nIntentCategory.CODE       # Repositories, packages, docs, bugs\nIntentCategory.FILES      # Documents, data files, archives, media\nIntentCategory.NEWS       # Breaking news, analysis, trends\nIntentCategory.IMAGES     # Image search\nIntentCategory.VIDEOS     # Video search\nIntentCategory.SOCIAL     # Reddit, forums, discussions\nIntentCategory.SHOPPING   # Products, prices, comparisons\nIntentCategory.LEGAL      # Law, regulations, patents\nIntentCategory.MEDICAL    # Health, diseases, clinical research\n\n\nSearch Strategies (auto-selected by depth + intent)\n\n# Strategies are selected automatically, but you can also invoke them directly:\nskill.execute_strategy(\"quick\", target=\"example.com\")           # 1 step, top engines\nskill.execute_strategy(\"broad_to_narrow\", target=\"example.com\") # Wide then focused\nskill.execute_strategy(\"multi_angle\", target=\"example.com\")     # Same topic, different formulations\nskill.execute_strategy(\"deep_dive\", target=\"example.com\")       # Exhaustive dork coverage\nskill.execute_strategy(\"osint_chain\", target=\"example.com\")     # Progressive recon\nskill.execute_strategy(\"verify\", target=\"some claim\")           # Cross-reference sources\nskill.execute_strategy(\"file_hunt\", target=\"example.com\")       # Targeted file search\nskill.execute_strategy(\"temporal\", target=\"AI news\")            # Across time periods\n\n\nSupported SearXNG Engines (90+)\n\n# General: google, bing, duckduckgo, brave, qwant, startpage, mojeek,\n#          yandex, yahoo, presearch, wiby, stract, yep, baidu, naver ...\n#\n# IT/Dev:  github, stackoverflow, gitlab, npm, pypi, dockerhub,\n#          arch_linux_wiki, crates_io, packagist, pkg_go_dev ...\n#\n# Science: arxiv, google_scholar, semantic_scholar, crossref, pubmed,\n#          base, openalex, core, wolfram_alpha ...\n#\n# News:    google_news, bing_news, yahoo_news, brave_news, wikinews ...\n#\n# Social:  reddit, lemmy, mastodon, hacker_news, lobsters ...\n#\n# Images:  google_images, bing_images, flickr, unsplash, openverse ...\n#\n# Videos:  youtube, google_videos, dailymotion, vimeo, piped, odysee ...\n#\n# Files:   piratebay, 1337x, annas_archive, z_library ...\n#\n# Music:   bandcamp, genius, soundcloud, youtube_music ...\n#\n# Maps:    openstreetmap, photon ...\n#\n# Wikis:   wikipedia, wikidata, wikimedia_commons ...\n\n\nDork Operators (auto-translated across engines)\n\n# Google operators:\n#   site:  filetype:  intitle:  allintitle:  inurl:  allinurl:\n#   intext:  allintext:  inanchor:  cache:  related:  info:  define:\n#   before:  after:  AROUND(N)  \"exact\"  -exclude  OR  *  N..M\n#\n# Bing operators:\n#   site:  filetype:  intitle:  inurl:  inbody:  contains:  ip:\n#   language:  loc:  prefer:  feed:  \"exact\"  -exclude  OR  NEAR:N\n#\n# DuckDuckGo operators:\n#   site:  filetype:  intitle:  inurl:  \"exact\"  -exclude  OR\n#\n# Yandex operators:\n#   site:  mime:  title:  inurl:  host:  domain:  lang:  date:\n#   \"exact\"  -exclude  |\n#\n# Brave operators:\n#   site:  filetype:  intitle:  inurl:  \"exact\"  -exclude  OR\n#\n# The skill auto-translates between engines:\n#   filetype: → mime: (Yandex)\n#   intitle:  → title: (Yandex)\n#   intext:   → inbody: (Bing)\n\nDork Template Library\n\nSecurity dorks available (by subcategory)\n\nexposed_files      — .env, .log, .sql, .bak, .conf, .pem, .key, .json\ndirectory_listing  — \"index of\", \"directory listing\", \"parent directory\"\nadmin_panels       — /admin, /login, /dashboard, wp-admin, phpmyadmin, cpanel\nsensitive_data     — passwords, RSA keys, AWS keys, database URLs, SMTP creds\nexposed_apis       — /api/, swagger, api-docs, graphql, openapi\nsubdomains         — site:*.domain, external references, inurl:domain\ngit_exposed        — .git, .git/config, .svn, .hg\ntechnology_stack   — \"powered by\", wp-content, X-Powered-By\ngeneral            — CVE, exploit, PoC, security advisory\n\n\nOSINT dorks available (by subcategory)\n\nperson    — LinkedIn, Twitter/X, Facebook, Instagram, GitHub, Medium, resume, CV\nemail     — email mentions, cross-site, leaks, LinkedIn, GitHub\nusername  — GitHub, Reddit, Twitter, Instagram, YouTube, Keybase, StackOverflow\ndomain    — site:, subdomains, whois, Shodan, DNS, SSL, Censys, crt.sh\ncompany   — LinkedIn company, Crunchbase, Glassdoor, SEC filings, employees\nphone     — whitepages, truecaller, Facebook, name/address\nip        — Shodan, abuse/blacklist, open ports, whois\n\n\nSEO dorks available (by subcategory)\n\nindexation     — site:, sitemap, blog, tag/category pages\nbacklinks      — external mentions, anchor text, link:\ncompetitors    — related:, same-keyword competitors\ncontent_audit  — intitle/inurl/intext keyword matching\ntechnical_seo  — sitemap XML, robots.txt, noindex, canonical, hreflang, schema\n\n\nAcademic dorks available (by subcategory)\n\npapers   — arxiv, ResearchGate, academia.edu, DOI, .edu PDFs\ndatasets — CSV, JSON, Kaggle, HuggingFace, Zenodo\nauthors  — Google Scholar, ORCID, ResearchGate, publication lists\n\n\nCode dorks available (by subcategory)\n\nrepositories  — GitHub, GitLab, Bitbucket, Codeberg, Sourcehut\npackages      — npm, PyPI, crates.io, RubyGems, Packagist, pkg.go.dev\ndocumentation — ReadTheDocs, README, API references\nissues_bugs   — GitHub issues, StackOverflow errors\n\nAdvanced Usage\n\nCross-engine dork translation\n\nfrom search_intelligence_skill import DorkGenerator\n\ngen = DorkGenerator()\n\n# Build a Google dork\ndork = gen.generate_custom(\n    keyword=\"secret\",\n    domain=\"example.com\",\n    filetype=\"env\",\n    intitle=\"config\",\n)\nprint(f\"Google: {dork.query}\")\n# → site:example.com filetype:env intitle:\"config\" secret\n\n# Translate to Yandex (filetype → mime, intitle → title)\nyandex = gen.translate(dork, \"yandex\")\nprint(f\"Yandex: {yandex.query}\")\n# → site:example.com mime:env title:\"config\" secret\n\n# Translate to Bing\nbing = gen.translate(dork, \"bing\")\nprint(f\"Bing:   {bing.query}\")\n\n# Translate to DuckDuckGo (drops unsupported operators)\nddg = gen.translate(dork, \"duckduckgo\")\nprint(f\"DDG:    {ddg.query}\")\n\n# Translate to an engine without operator support (strips all operators)\nplain = gen.translate(dork, \"wikipedia\")\nprint(f\"Plain:  {plain.query}\")\n\n\nResult scoring details\n\n# Each result is scored on 7 signals (0-10 scale):\n#\n# 1. SearXNG base score (normalized)        — weight: 2.0\n# 2. Keyword match in title + snippet       — weight: 3.0\n# 3. Multi-engine agreement (appeared in N)  — weight: 0.5/engine, max 2.0\n# 4. Position rank (lower = better)          — weight: 1.5\n# 5. Source credibility (.gov +1.5, .edu +1.4, arxiv +1.4, etc.)\n# 6. Content quality (snippet length, HTTPS, URL sanity)\n# 7. Intent-specific boost (arxiv for academic, github for code, etc.)\n#\n# Credibility penalties: spam (-0.7), \"click here\" (-0.5), \"free download\" (-0.4)\n\n\nAuto-refinement behavior\n\n# When auto_refine=True (default) and results < 5:\n# 1. Analyzer generates refined queries (broader, different keywords)\n# 2. Skill executes up to 3 refinement queries\n# 3. New results are merged with originals\n# 4. Full dedup + re-scoring runs\n# 5. Process repeats up to max_refine_rounds\n\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    auto_refine=True,\n    max_refine_rounds=2,  # Try refining up to 2 times\n)\n\n# Disable auto-refinement for speed-critical paths\nskill_fast = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    auto_refine=False,\n)\n\n\nEntity extraction capabilities\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\n\n# Domains\nintent = parser.parse(\"scan example.com for vulnerabilities\")\n# entities: {\"domain\": \"example.com\"}\n\n# Emails\nintent = parser.parse(\"investigate user@company.com\")\n# entities: {\"email\": \"user@company.com\", \"email_domain\": \"company.com\"}\n\n# IPs\nintent = parser.parse(\"lookup 192.168.1.1\")\n# entities: {\"ip\": \"192.168.1.1\"}\n\n# CVEs\nintent = parser.parse(\"details on CVE-2024-3094\")\n# entities: {\"cve\": \"CVE-2024-3094\"}\n\n# Phone numbers\nintent = parser.parse(\"find owner of +1-555-123-4567\")\n# entities: {\"phone\": \"+1-555-123-4567\"}\n\n# Usernames\nintent = parser.parse(\"find accounts for @johndoe42\")\n# entities: {\"username\": \"johndoe42\"}\n\n# Names (quoted)\nintent = parser.parse('investigate \"John Smith\"')\n# entities: {\"name\": \"John Smith\"}\n\n# Names (capitalized pattern)\nintent = parser.parse(\"find information about Jane Doe\")\n# entities: {\"name\": \"Jane Doe\"}\n\n# File types\nintent = parser.parse(\"find documents filetype:pdf\")\n# entities: {\"filetype\": \"pdf\"}\n\n# Years\nintent = parser.parse(\"research papers from 2024\")\n# entities: {\"year\": \"2024\"}\n\n# Multiple entities combined\nintent = parser.parse('CVE-2024-3094 on example.com \"John Doe\"')\n# entities: {\"cve\": \"CVE-2024-3094\", \"domain\": \"example.com\", \"name\": \"John Doe\"}\n\n\nTime range detection\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\n\nparser.parse(\"news today\").time_range                  # \"day\"\nparser.parse(\"what happened this week\").time_range     # \"week\"\nparser.parse(\"articles from last month\").time_range    # \"month\"\nparser.parse(\"publications this year\").time_range      # \"year\"\nparser.parse(\"latest updates on AI\").time_range        # \"month\" (heuristic)\nparser.parse(\"history of computing\").time_range        # \"\" (no time constraint)\n\n\nConstraint extraction\n\nfrom search_intelligence_skill import IntentParser\n\nparser = IntentParser()\n\n# Language constraints\nintent = parser.parse(\"machine learning tutorials in spanish\")\n# constraints: {\"language\": \"es\"}\n\n# Exhaustive hints\nintent = parser.parse(\"find everything about this vulnerability\")\n# constraints: {\"exhaustive\": True}\n\n# Result limits\nintent = parser.parse(\"top 20 python frameworks\")\n# constraints: {\"limit\": 20}\n\n# Exclusion hints\nintent = parser.parse(\"web frameworks except Django without Flask\")\n# constraints: {\"exclude\": [\"django\", \"flask\"]}\n\n\nPagination\n\nfrom search_intelligence_skill import SearXNGClient\n\nclient = SearXNGClient(base_url=\"http://localhost:8888\")\n\n# Fetch multiple pages\nall_results = []\nfor page in range(1, 4):\n    raw = client.search(\"python frameworks\", pageno=page)\n    results = client.parse_results(raw)\n    all_results.extend(results)\n    if not results:\n        break\n\nprint(f\"Total across 3 pages: {len(all_results)}\")\nclient.close()\n\n\nRate limiting and retries\n\n# Built-in rate limiting between requests\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    rate_limit=1.0,    # 1 second minimum between requests\n    max_retries=3,     # Retry failed requests up to 3 times\n    timeout=30.0,      # 30 second timeout per request\n)\n\n# Rate limiting is automatic — no manual sleep() needed\n# Retries use increasing delays on 429 (Too Many Requests)\n\n\nLogging for debugging\n\nimport logging\n\n# See everything the skill does\nlogging.basicConfig(level=logging.DEBUG)\n\n# Or just info-level\nlogging.basicConfig(level=logging.INFO)\n\nskill = SearchSkill(searxng_url=\"http://localhost:8888\")\nreport = skill.search(\"test query\", depth=\"standard\")\n\n# Logs will show:\n# INFO — Intent: security/exposed_files (confidence=0.95) — entities: {\"domain\": \"...\"}\n# INFO — Strategy: multi_angle — 2 steps\n# DEBUG — Executing step 1: Search angle 1\n# DEBUG — Search 'site:... filetype:env' returned 12 results\n# DEBUG — Executing step 2: Search angle 2\n# INFO — Search complete: 23 results, 4.21s, 4 engines\n\nAPI Methods\nMethod\tPurpose\tReturns\nskill.search(query, depth, ...)\tFull intelligent search pipeline\tSearchReport\nskill.search_dork(dork, ...)\tExecute raw dork query directly\tSearchReport\nskill.suggest_queries(query)\tPreview dorks without executing\tlist[DorkQuery]\nskill.build_dork(keyword, ...)\tBuild custom dork from parameters\tDorkQuery\nskill.execute_strategy(name, target)\tRun named strategy against target\tSearchReport\nskill.search_batch(queries, ...)\tExecute multiple searches\tlist[SearchReport]\nskill.health_check()\tCheck SearXNG connectivity\tbool\nskill.close()\tClose HTTP client\tNone\nSearchReport Properties\nProperty\tType\tDescription\n.query\tstr\tOriginal natural language query\n.intent\tSearchIntent\tParsed intent with category, entities, keywords\n.strategy\tSearchStrategy\tStrategy that was used (name, steps)\n.results\tlist[SearchResult]\tScored and deduplicated results\n.total_found\tint\tTotal results before deduplication\n.suggestions\tlist[str]\tRefinement suggestions\n.refined_queries\tlist[str]\tAuto-refinement queries used\n.errors\tlist[str]\tErrors encountered during search\n.timing_seconds\tfloat\tTotal wall-clock time\n.engines_used\tlist[str]\tEngines that returned results\n.to_context(max_results)\tstr\tLLM-formatted text output\n.top(n)\tlist[SearchResult]\tTop N by relevance score\nSearchResult Properties\nProperty\tType\tDescription\n.title\tstr\tResult title\n.url\tstr\tResult URL\n.snippet\tstr\tContent snippet / description\n.engines\tlist[str]\tWhich SearXNG engines returned it\n.score\tfloat\tRaw SearXNG score\n.relevance\tfloat\tComputed multi-signal relevance (0-10)\n.category\tstr\tSearXNG result category\n.positions\tlist[int]\tRank positions across engines\n.metadata\tdict\tExtra fields: publishedDate, thumbnail, img_src\nTroubleshooting\n\nSearXNG not reachable\n\n# Check the instance is running\ncurl http://localhost:8888/healthz\n\n# Check JSON API is enabled\ncurl \"http://localhost:8888/search?q=test&format=json\"\n\n# Common fixes:\n# 1. Ensure port mapping is correct (docker: -p 8888:8080)\n# 2. Ensure search.formats includes \"json\" in settings.yml\n# 3. Check firewall rules\n\nif not skill.health_check():\n    print(\"SearXNG unreachable — check URL, port, and settings\")\n\n\nNo results returned\n\nreport = skill.search(\"very specific obscure query\")\n\nif not report.results:\n    print(\"No results. Try:\")\n    print(\"  1. Broader keywords\")\n    print(\"  2. Different depth: depth='deep'\")\n    print(\"  3. Check suggestions:\", report.suggestions)\n    print(\"  4. Check errors:\", report.errors)\n    print(\"  5. Try different engines:\", report.engines_used)\n\n    # Manual broader search\n    report2 = skill.search(\"broader version of query\", depth=\"deep\")\n\n\nTimeout errors\n\n# Increase timeout for complex queries\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    timeout=60.0,      # 60 seconds\n    max_retries=3,     # More retries\n)\n\n\nRate limiting (429 errors)\n\n# Increase delay between requests\nskill = SearchSkill(\n    searxng_url=\"http://localhost:8888\",\n    rate_limit=2.0,    # 2 seconds between requests\n)\n\n\nSSL errors (local development only)\n\nskill = SearchSkill(\n    searxng_url=\"https://localhost:8888\",\n    verify_ssl=False,  # ONLY for local dev — never in production\n)\n\n\nWrong intent detected\n\n# If the auto-detection picks the wrong category, use direct dork:\nreport = skill.search_dork(\n    'site:example.com filetype:pdf \"annual report\"',\n    engines=[\"google\", \"bing\"],\n)\n\n# Or force engines/categories:\nreport = skill.search(\n    \"some ambiguous query\",\n    engines=[\"google_scholar\", \"arxiv\"],\n    categories=[\"science\"],\n)\n\n\nMemory usage with large result sets\n\n# Limit results to control memory\nreport = skill.search(\"broad query\", depth=\"exhaustive\", max_results=50)\n\n# Process results in a streaming fashion\nfor r in report.results:\n    process(r)  # handle one at a time\n\nHow It All Works Together\nUser Query\n    │\n    ▼\n┌─────────────────┐\n│  IntentParser    │──→ category, subcategory, entities, keywords\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│  DorkGenerator   │──→ 5-20 optimized dork queries with operators\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│ StrategyPlanner  │──→ multi-step plan (which dorks, which engines, what order)\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│  SearXNGClient   │──→ executes queries against your instance (retries, rate limit)\n└────────┬────────┘\n         │\n         ▼\n┌─────────────────┐\n│ ResultAnalyzer   │──→ dedup, score, rank, credibility check\n└────────┬────────┘\n         │\n    (if results poor)\n         │\n         ▼\n┌─────────────────┐\n│  Auto-Refine     │──→ generate new queries, re-search, re-analyze\n└────────┬────────┘\n         │\n         ▼\n   SearchReport\n  .to_context() → LLM-ready text\n  .top(n) → best results\n  .results → full list\n\nNotes\n\nPrivacy\n\nAll searches route through YOUR SearXNG instance\nZero API keys required for any engine\nNo data sent to third-party services (except through SearXNG's engine requests)\nSearXNG strips tracking parameters and anonymizes requests\n\nPerformance tips\n\nReuse the SearchSkill instance across searches (connection pooling)\nUse depth=\"quick\" for simple lookups, reserve \"deep\" / \"exhaustive\" for research\nSet auto_refine=False for speed-critical paths\nUse skill.suggest_queries() to preview before executing expensive searches\nBatch independent queries with skill.search_batch()\n\nAccuracy tips\n\nInclude specific entities in your query (domains, emails, CVEs, names)\nUse quoted phrases for exact matching: 'find \"exact phrase\"'\nSpecify time ranges when freshness matters: \"latest news this week\"\nUse depth=\"deep\" or \"exhaustive\" for comprehensive coverage\nCheck report.suggestions for refinement ideas\nCheck report.intent to verify the skill understood your query correctly\n\nExtending the skill\n\nAdd new dork templates in config.py → DORK_TEMPLATES\nAdd new intent signals in config.py → INTENT_SIGNALS\nAdd new engines in config.py → ENGINE_CATEGORIES\nAdd new operator translations in config.py → OPERATOR_SUPPORT\nAdd new strategies in config.py → STRATEGY_DEFINITIONS\nAdd new subcategory detection in intent.py → SUBCATEGORY_PATTERNS\n\nConfirm before sensitive operations\n\nSecurity scanning dorks may trigger alerts on target domains\nOSINT queries may involve personal information — use responsibly\nAlways validate that the target domain/entity is authorized for testing\nThis tool is for legitimate research, authorized security testing, and SEO analysis"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/elmaslouhymouaad/search-intelligence-skill",
    "publisherUrl": "https://clawhub.ai/elmaslouhymouaad/search-intelligence-skill",
    "owner": "elmaslouhymouaad",
    "version": "0.1.1",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/search-intelligence-skill",
    "downloadUrl": "https://openagent3.xyz/downloads/search-intelligence-skill",
    "agentUrl": "https://openagent3.xyz/skills/search-intelligence-skill/agent",
    "manifestUrl": "https://openagent3.xyz/skills/search-intelligence-skill/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/search-intelligence-skill/agent.md"
  }
}