{
  "schemaVersion": "1.0",
  "item": {
    "slug": "sogni-gen",
    "name": "Sogni Gen",
    "source": "tencent",
    "type": "skill",
    "category": "AI 智能",
    "sourceUrl": "https://clawhub.ai/krunkosaurus/sogni-gen",
    "canonicalUrl": "https://clawhub.ai/krunkosaurus/sogni-gen",
    "targetPlatform": "OpenClaw"
  },
  "install": {
    "downloadMode": "redirect",
    "downloadUrl": "/downloads/sogni-gen",
    "sourceDownloadUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=sogni-gen",
    "sourcePlatform": "tencent",
    "targetPlatform": "OpenClaw",
    "installMethod": "Manual import",
    "extraction": "Extract archive",
    "prerequisites": [
      "OpenClaw"
    ],
    "packageFormat": "ZIP package",
    "includedAssets": [
      "LICENSE",
      "README.md",
      "SKILL.md",
      "llm.txt",
      "skill-package.json",
      "version.mjs"
    ],
    "primaryDoc": "SKILL.md",
    "quickSetup": [
      "Download the package from Yavira.",
      "Extract the archive and review SKILL.md first.",
      "Import or place the package into your OpenClaw setup."
    ],
    "agentAssist": {
      "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
      "steps": [
        "Download the package from Yavira.",
        "Extract it into a folder your agent can access.",
        "Paste one of the prompts below and point your agent at the extracted folder."
      ],
      "prompts": [
        {
          "label": "New install",
          "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
        },
        {
          "label": "Upgrade existing",
          "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
        }
      ]
    },
    "sourceHealth": {
      "source": "tencent",
      "status": "healthy",
      "reason": "direct_download_ok",
      "recommendedAction": "download",
      "checkedAt": "2026-04-30T16:55:25.780Z",
      "expiresAt": "2026-05-07T16:55:25.780Z",
      "httpStatus": 200,
      "finalUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
      "contentType": "application/zip",
      "probeMethod": "head",
      "details": {
        "probeUrl": "https://wry-manatee-359.convex.site/api/v1/download?slug=network",
        "contentDisposition": "attachment; filename=\"network-1.0.0.zip\"",
        "redirectLocation": null,
        "bodySnippet": null
      },
      "scope": "source",
      "summary": "Source download looks usable.",
      "detail": "Yavira can redirect you to the upstream package for this source.",
      "primaryActionLabel": "Download for OpenClaw",
      "primaryActionHref": "/downloads/sogni-gen"
    },
    "validation": {
      "installChecklist": [
        "Use the Yavira download entry.",
        "Review SKILL.md after the package is downloaded.",
        "Confirm the extracted package contains the expected setup assets."
      ],
      "postInstallChecks": [
        "Confirm the extracted package includes the expected docs or setup files.",
        "Validate the skill or prompts are available in your target agent workspace.",
        "Capture any manual follow-up steps the agent could not complete."
      ]
    },
    "downloadPageUrl": "https://openagent3.xyz/downloads/sogni-gen",
    "agentPageUrl": "https://openagent3.xyz/skills/sogni-gen/agent",
    "manifestUrl": "https://openagent3.xyz/skills/sogni-gen/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/sogni-gen/agent.md"
  },
  "agentAssist": {
    "summary": "Hand the extracted package to your coding agent with a concrete install brief instead of figuring it out manually.",
    "steps": [
      "Download the package from Yavira.",
      "Extract it into a folder your agent can access.",
      "Paste one of the prompts below and point your agent at the extracted folder."
    ],
    "prompts": [
      {
        "label": "New install",
        "body": "I downloaded a skill package from Yavira. Read SKILL.md from the extracted folder and install it by following the included instructions. Then review README.md for any prerequisites, environment setup, or post-install checks. Tell me what you changed and call out any manual steps you could not complete."
      },
      {
        "label": "Upgrade existing",
        "body": "I downloaded an updated skill package from Yavira. Read SKILL.md from the extracted folder, compare it with my current installation, and upgrade it while preserving any custom configuration unless the package docs explicitly say otherwise. Then review README.md for any prerequisites, environment setup, or post-install checks. Summarize what changed and any follow-up checks I should run."
      }
    ]
  },
  "documentation": {
    "source": "clawhub",
    "primaryDoc": "SKILL.md",
    "sections": [
      {
        "title": "Sogni Image & Video Generation",
        "body": "Generate images and videos using Sogni AI's decentralized GPU network."
      },
      {
        "title": "Setup",
        "body": "Get Sogni credentials at https://app.sogni.ai/\nCreate credentials file:\n\nmkdir -p ~/.config/sogni\ncat > ~/.config/sogni/credentials << 'EOF'\nSOGNI_API_KEY=your_api_key\n# or:\n# SOGNI_USERNAME=your_username\n# SOGNI_PASSWORD=your_password\nEOF\nchmod 600 ~/.config/sogni/credentials\n\nYou can also export SOGNI_API_KEY, or SOGNI_USERNAME + SOGNI_PASSWORD, instead of writing the file.\n\nInstall dependencies (if cloned):\n\ncd /path/to/sogni-gen\nnpm i\n\nOr install from npm (no git clone):\n\nmkdir -p ~/.clawdbot/skills\ncd ~/.clawdbot/skills\nnpm i sogni-gen\nln -sfn node_modules/sogni-gen sogni-gen\n\nWhen this skill is distributed via ClawHub, it bootstraps its local runtime dependencies from skill-package.json during install. That avoids relying on a root package.json being present in the published skill artifact."
      },
      {
        "title": "Filesystem Paths and Overrides",
        "body": "Default file paths used by this skill:\n\nCredentials file (read): ~/.config/sogni/credentials\nLast render metadata (read/write): ~/.config/sogni/last-render.json\nOpenClaw config (read): ~/.openclaw/openclaw.json\nMedia listing for --list-media (read): ~/.clawdbot/media/inbound\nMCP local result copies (write): ~/Downloads/sogni\n\nPath override environment variables:\n\nSOGNI_CREDENTIALS_PATH\nSOGNI_LAST_RENDER_PATH\nSOGNI_MEDIA_INBOUND_DIR\nOPENCLAW_CONFIG_PATH\nSOGNI_DOWNLOADS_DIR (MCP)\nSOGNI_MCP_SAVE_DOWNLOADS=0 to disable MCP local file writes\nSOGNI_ALLOWED_DOWNLOAD_HOSTS to override which HTTPS hosts the MCP server may auto-download and save locally"
      },
      {
        "title": "Usage (Images & Video)",
        "body": "# Generate and get URL\nnode sogni-gen.mjs \"a cat wearing a hat\"\n\n# Save to file\nnode sogni-gen.mjs -o /tmp/cat.png \"a cat wearing a hat\"\n\n# JSON output (for scripting)\nnode sogni-gen.mjs --json \"a cat wearing a hat\"\n\n# Check token balances (no prompt required)\nnode sogni-gen.mjs --balance\n\n# Check token balances in JSON\nnode sogni-gen.mjs --json --balance\n\n# Quiet mode (suppress progress)\nnode sogni-gen.mjs -q -o /tmp/cat.png \"a cat wearing a hat\""
      },
      {
        "title": "Options",
        "body": "FlagDescriptionDefault-o, --output <path>Save to fileprints URL-m, --model <id>Model IDz_image_turbo_bf16-w, --width <px>Width512-h, --height <px>Height512-n, --count <num>Number of images1-t, --timeout <sec>Timeout seconds30 (300 for video)-s, --seed <num>Specific seedrandom--last-seedReuse seed from last render---seed-strategy <s>Seed strategy: random|prompt-hashprompt-hash--multi-angleMultiple angles LoRA mode (Qwen Image Edit)---angles-360Generate 8 azimuths (front -> front-left)---angles-360-videoAssemble looping 360 mp4 using i2v between angles (requires ffmpeg)---azimuth <key>front|front-right|right|back-right|back|back-left|left|front-leftfront--elevation <key>low-angle|eye-level|elevated|high-angleeye-level--distance <key>close-up|medium|widemedium--angle-strength <n>LoRA strength for multiple_angles0.9--angle-description <text>Optional subject description---steps <num>Override steps (model-dependent)---guidance <num>Override guidance (model-dependent)---output-format <f>Image output format: png|jpgpng--sampler <name>Sampler (model-dependent)---scheduler <name>Scheduler (model-dependent)---lora <id>LoRA id (repeatable, edit only)---loras <ids>Comma-separated LoRA ids---lora-strength <n>LoRA strength (repeatable)---lora-strengths <n>Comma-separated LoRA strengths---token-type <type>Token type: spark|sognispark--balance, --balancesShow SPARK/SOGNI balances and exit--c, --context <path>Context image for editing---last-imageUse last generated image as context/ref---video, -vGenerate video instead of image---workflow <type>Video workflow (t2v|i2v|s2v|ia2v|a2v|v2v|animate-move|animate-replace)inferred--fps <num>Frames per second (video)16--duration <sec>Duration in seconds (video)5--frames <num>Override total frames (video)---auto-resize-assetsAuto-resize video assetstrue--no-auto-resize-assetsDisable auto-resize---estimate-video-costEstimate video cost and exit (requires --steps)---photoboothFace transfer mode (InstantID + SDXL Turbo)---cn-strength <n>ControlNet strength (photobooth)0.8--cn-guidance-end <n>ControlNet guidance end point (photobooth)0.3--ref <path>Reference image for video or photobooth facerequired for video/photobooth--ref-end <path>End frame for i2v interpolation---ref-audio <path>Reference audio for s2v---ref-video <path>Reference video for animate/v2v workflows---controlnet-name <name>ControlNet type for v2v: canny|pose|depth|detailer---controlnet-strength <n>ControlNet strength for v2v (0.0-1.0)0.8--sam2-coordinates <coords>SAM2 click coords for animate-replace (x,y or x1,y1;x2,y2)---trim-end-frameTrim last frame for seamless video stitching---first-frame-strength <n>Keyframe strength for start frame (0.0-1.0)---last-frame-strength <n>Keyframe strength for end frame (0.0-1.0)---lastShow last render info---jsonJSON outputfalse--strict-sizeDo not auto-adjust i2v video size for reference resizing constraintsfalse-q, --quietNo progress outputfalse--extract-last-frame <video> <image>Extract last frame from video (safe ffmpeg wrapper)---concat-videos <out> <clips...>Concatenate video clips (safe ffmpeg wrapper)---list-media [type]List recent inbound media (images|audio|all)images"
      },
      {
        "title": "OpenClaw Config Defaults",
        "body": "When installed as an OpenClaw plugin, sogni-gen will read defaults from:\n\n~/.openclaw/openclaw.json\n\n{\n  \"plugins\": {\n    \"entries\": {\n      \"sogni-gen\": {\n        \"enabled\": true,\n        \"config\": {\n          \"defaultImageModel\": \"z_image_turbo_bf16\",\n          \"defaultEditModel\": \"qwen_image_edit_2511_fp8_lightning\",\n          \"defaultPhotoboothModel\": \"coreml-sogniXLturbo_alpha1_ad\",\n          \"videoModels\": {\n            \"t2v\": \"wan_v2.2-14b-fp8_t2v_lightx2v\",\n            \"i2v\": \"wan_v2.2-14b-fp8_i2v_lightx2v\",\n            \"s2v\": \"wan_v2.2-14b-fp8_s2v_lightx2v\",\n            \"ia2v\": \"ltx2-19b-fp8_ia2v_distilled\",\n            \"a2v\": \"ltx2-19b-fp8_a2v_distilled\",\n            \"animate-move\": \"wan_v2.2-14b-fp8_animate-move_lightx2v\",\n            \"animate-replace\": \"wan_v2.2-14b-fp8_animate-replace_lightx2v\",\n            \"v2v\": \"ltx2-19b-fp8_v2v_distilled\"\n          },\n          \"defaultVideoWorkflow\": \"t2v\",\n          \"defaultNetwork\": \"fast\",\n          \"defaultTokenType\": \"spark\",\n          \"seedStrategy\": \"prompt-hash\",\n          \"modelDefaults\": {\n            \"flux1-schnell-fp8\": { \"steps\": 4, \"guidance\": 3.5 },\n            \"flux2_dev_fp8\": { \"steps\": 20, \"guidance\": 7.5 }\n          },\n          \"defaultWidth\": 768,\n          \"defaultHeight\": 768,\n          \"defaultCount\": 1,\n          \"defaultFps\": 16,\n          \"defaultDurationSec\": 5,\n          \"defaultImageTimeoutSec\": 30,\n          \"defaultVideoTimeoutSec\": 300,\n          \"credentialsPath\": \"~/.config/sogni/credentials\",\n          \"lastRenderPath\": \"~/.config/sogni/last-render.json\",\n          \"mediaInboundDir\": \"~/.clawdbot/media/inbound\"\n        }\n      }\n    }\n  }\n}\n\nCLI flags always override these defaults.\nIf your OpenClaw config lives elsewhere, set OPENCLAW_CONFIG_PATH.\nSeed strategies: prompt-hash (deterministic) or random."
      },
      {
        "title": "Image Models",
        "body": "ModelSpeedUse Casez_image_turbo_bf16Fast (~5-10s)General purpose, defaultflux1-schnell-fp8Very fastQuick iterationsflux2_dev_fp8Slow (~2min)High qualitychroma-v.46-flash_fp8MediumBalancedqwen_image_edit_2511_fp8MediumImage editing with context (up to 3)qwen_image_edit_2511_fp8_lightningFastQuick image editingcoreml-sogniXLturbo_alpha1_adFastPhotobooth face transfer (SDXL Turbo)"
      },
      {
        "title": "WAN 2.2 Models",
        "body": "ModelSpeedUse Casewan_v2.2-14b-fp8_i2v_lightx2vFastDefault video generationwan_v2.2-14b-fp8_i2vSlowHigher quality videowan_v2.2-14b-fp8_t2v_lightx2vFastText-to-videowan_v2.2-14b-fp8_s2v_lightx2vFastSound-to-videowan_v2.2-14b-fp8_animate-move_lightx2vFastAnimate-movewan_v2.2-14b-fp8_animate-replace_lightx2vFastAnimate-replace"
      },
      {
        "title": "LTX-2 / LTX-2.3 Models",
        "body": "ModelSpeedUse Caseltx2-19b-fp8_t2v_distilledFast (~2-3min)Text-to-video, 8-stepltx2-19b-fp8_t2vMedium (~5min)Text-to-video, 20-step qualityltx2-19b-fp8_i2v_distilledFast (~2-3min)Image-to-video, 8-stepltx2-19b-fp8_i2vMedium (~5min)Image-to-video, 20-step qualityltx2-19b-fp8_ia2v_distilledFast (~2-3min)Image+audio-to-videoltx2-19b-fp8_a2v_distilledFast (~2-3min)Audio-to-videoltx2-19b-fp8_v2v_distilledFast (~3min)Video-to-video with ControlNetltx2-19b-fp8_v2vMedium (~5min)Video-to-video with ControlNet, qualityltx23-22b-fp8_t2v_distilledFast (~2-3min)Text-to-video, LTX-2.3"
      },
      {
        "title": "Image Editing with Context",
        "body": "Edit images using reference images (Qwen models support up to 3):\n\n# Single context image\nnode sogni-gen.mjs -c photo.jpg \"make the background a beach\"\n\n# Multiple context images (subject + style)\nnode sogni-gen.mjs -c subject.jpg -c style.jpg \"apply the style to the subject\"\n\n# Use last generated image as context\nnode sogni-gen.mjs --last-image \"make it more vibrant\"\n\nWhen context images are provided without -m, defaults to qwen_image_edit_2511_fp8_lightning."
      },
      {
        "title": "Photobooth (Face Transfer)",
        "body": "Generate stylized portraits from a face photo using InstantID ControlNet. When a user mentions \"photobooth\", wants a stylized portrait of themselves, or asks to transfer their face into a style, use --photobooth with --ref pointing to their face image.\n\n# Basic photobooth\nnode sogni-gen.mjs --photobooth --ref face.jpg \"80s fashion portrait\"\n\n# Multiple outputs\nnode sogni-gen.mjs --photobooth --ref face.jpg -n 4 \"LinkedIn professional headshot\"\n\n# Custom ControlNet tuning\nnode sogni-gen.mjs --photobooth --ref face.jpg --cn-strength 0.6 --cn-guidance-end 0.5 \"oil painting\"\n\nUses SDXL Turbo (coreml-sogniXLturbo_alpha1_ad) at 1024x1024 by default. The face image is passed via --ref and styled according to the prompt. Cannot be combined with --video or -c/--context.\n\nAgent usage:\n\n# Photobooth: stylize a face photo\nnode {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -o /tmp/stylized.png \"80s fashion portrait\"\n\n# Multiple photobooth outputs\nnode {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -n 4 -o /tmp/stylized.png \"LinkedIn professional headshot\""
      },
      {
        "title": "Multiple Angles (Turnaround)",
        "body": "Generate specific camera angles from a single reference image using the Multiple Angles LoRA:\n\n# Single angle\nnode sogni-gen.mjs --multi-angle -c subject.jpg \\\n  --azimuth front-right --elevation eye-level --distance medium \\\n  --angle-strength 0.9 \\\n  \"studio portrait, same person\"\n\n# 360 sweep (8 azimuths)\nnode sogni-gen.mjs --angles-360 -c subject.jpg --distance medium --elevation eye-level \\\n  \"studio portrait, same person\"\n\n# 360 sweep video (looping mp4, uses i2v between angles; requires ffmpeg)\nnode sogni-gen.mjs --angles-360 --angles-360-video /tmp/turntable.mp4 \\\n  -c subject.jpg --distance medium --elevation eye-level \\\n  \"studio portrait, same person\"\n\nThe prompt is auto-built with the required <sks> token plus the selected camera angle keywords.\n--angles-360-video generates i2v clips between consecutive angles (including last→first) and concatenates them with ffmpeg for a seamless loop."
      },
      {
        "title": "360 Video Best Practices",
        "body": "When a user requests a \"360 video\", follow this workflow:\n\nDefault camera parameters (do not ask unless they specify):\n\nElevation: default to medium\nDistance: default to medium\n\n\n\nMap user terms to flags:\nUser saysFlag value\"high\" angle--elevation high-angle\"medium\" angle--elevation eye-level\"low\" angle--elevation low-angle\"close\"--distance close-up\"medium\" distance--distance medium\"far\"--distance wide\n\n\nAlways use first-frame/last-frame stitching - the --angles-360-video flag automatically handles this by generating i2v clips between consecutive angles including last→first for seamless looping.\n\n\nExample command:\nnode sogni-gen.mjs --angles-360 --angles-360-video /tmp/output.mp4 \\\n  -c /path/to/image.png --elevation eye-level --distance medium \\\n  \"description of subject\""
      },
      {
        "title": "Transition Video Rule",
        "body": "For any transition video work, always use the Sogni skill/plugin (not raw ffmpeg or other shell commands). Use the built-in --extract-last-frame, --concat-videos, and --looping flags for video manipulation."
      },
      {
        "title": "Insufficient Funds Handling",
        "body": "When you see \"Debit Error: Insufficient funds\", reply:\n\n\"Insufficient funds. Claim 50 free daily Spark points at https://app.sogni.ai/\""
      },
      {
        "title": "Video Generation",
        "body": "Generate videos from a reference image:\n\n# Text-to-video (t2v)\nnode sogni-gen.mjs --video \"ocean waves at sunset\"\n\n# Basic video from image\nnode sogni-gen.mjs --video --ref cat.jpg -o cat.mp4 \"cat walks around\"\n\n# Use last generated image as reference\nnode sogni-gen.mjs --last-image --video \"gentle camera pan\"\n\n# Custom duration and FPS\nnode sogni-gen.mjs --video --ref scene.png --duration 10 --fps 24 \"zoom out slowly\"\n\n# Sound-to-video (s2v)\nnode sogni-gen.mjs --video --ref face.jpg --ref-audio speech.m4a \\\n  -m wan_v2.2-14b-fp8_s2v_lightx2v \"lip sync talking head\"\n\n# Image+audio-to-video (ia2v, LTX)\nnode sogni-gen.mjs --video --workflow ia2v --ref cover.jpg --ref-audio song.mp3 \\\n  \"music video with synchronized motion\"\n\n# Audio-to-video (a2v, LTX)\nnode sogni-gen.mjs --video --workflow a2v --ref-audio song.mp3 \\\n  \"abstract audio-reactive visualizer\"\n\n# LTX-2.3 text-to-video\nnode sogni-gen.mjs --video -m ltx23-22b-fp8_t2v_distilled --duration 20 \\\n  \"A wide cinematic aerial shot opens over steep tropical cliffs at golden hour, warm sunlight grazing the rock faces while sea mist drifts above the water below. Palm trees bend gently along the ridge as waves roll against the shoreline, leaving bright bands of foam across the dark stone. The camera glides forward in one continuous pass, revealing more of the coastline as sunlight flickers across wet surfaces and distant birds wheel through the haze. The scene holds a calm, upscale travel-film mood with smooth stabilized motion and crisp environmental detail.\"\n\n# Animate (motion transfer)\nnode sogni-gen.mjs --video --ref subject.jpg --ref-video motion.mp4 \\\n  --workflow animate-move \"transfer motion\""
      },
      {
        "title": "Video-to-Video (V2V) with ControlNet",
        "body": "Transform an existing video using LTX-2 models with ControlNet guidance:\n\n# Basic v2v with canny edge detection\nnode sogni-gen.mjs --video --workflow v2v --ref-video input.mp4 \\\n  --controlnet-name canny \"stylized anime version\"\n\n# V2V with pose detection and custom strength\nnode sogni-gen.mjs --video --workflow v2v --ref-video dance.mp4 \\\n  --controlnet-name pose --controlnet-strength 0.7 \"robot dancing\"\n\n# V2V with depth map\nnode sogni-gen.mjs --video --workflow v2v --ref-video scene.mp4 \\\n  --controlnet-name depth \"watercolor painting style\"\n\nControlNet types: canny (edge detection), pose (body pose), depth (depth map), detailer (detail enhancement)."
      },
      {
        "title": "Photo Restoration",
        "body": "Restore damaged vintage photos using Qwen image editing:\n\n# Basic restoration\nsogni-gen -c damaged_photo.jpg -o restored.png \\\n  \"professionally restore this vintage photograph, remove damage and scratches\"\n\n# Detailed restoration with preservation hints\nsogni-gen -c old_photo.jpg -o restored.png -w 1024 -h 1280 \\\n  \"restore this vintage photo, remove peeling, tears and wear marks, \\\n  preserve natural features and expression, maintain warm nostalgic color tones\"\n\nTips for good restorations:\n\nDescribe the damage: \"peeling\", \"scratches\", \"tears\", \"fading\"\nSpecify what to preserve: \"natural features\", \"eye color\", \"hair\", \"expression\"\nMention the era for color tones: \"1970s warm tones\", \"vintage sepia\"\n\nFinding received images (Telegram/etc):\n\nnode {{skillDir}}/sogni-gen.mjs --json --list-media images\n\nDo NOT use ls, cp, or other shell commands to browse user files. Always use --list-media to find inbound media."
      },
      {
        "title": "IMPORTANT KEYWORD RULE",
        "body": "If the user message includes the word \"photobooth\" (case-insensitive), always use --photobooth mode with --ref set to the user-provided face image.\nPrioritize this rule over generic image-edit flows (-c) for that request."
      },
      {
        "title": "LTX-2.3 Prompt Rule",
        "body": "Whenever the chosen video model is ltx23-22b-fp8_t2v_distilled, do not pass the user's short request through unchanged. Rewrite it into an LTX-2.3-safe prompt before calling sogni-gen.\n\nOutput one single paragraph only. No line breaks, bullet points, section labels, tag lists, or screenplay formatting.\nUse 4-8 flowing present-tense sentences describing one continuous shot. No cuts, montage, or unrelated scene jumps.\nStart with shot scale plus the scene's visual identity, then describe environment, time of day, atmosphere, textures, and specific light sources.\nKeep people, clothing, props, and locations concrete and stable across the whole paragraph.\nGive the scene one main action thread from start to finish. Use connectors like as, while, and then so motion reads as a continuous filmed moment.\nIf the user asks for dialogue, embed the spoken words inline as prose and identify who is speaking and how they deliver the line.\nExpress emotion through visible physical cues such as posture, grip, jaw tension, breathing, or pacing. Ambient sound can be woven into the prose naturally.\nUse positive phrasing only. Do not add negative prompts, \"no ...\" clauses, on-screen text/logo requests, vague filler words like beautiful or nice, or structural markup such as [DIALOGUE].\nKeep action density proportional to duration. For short clips, describe one main beat rather than several separate events.\nPreserve the user's request, but expand it into cinematic prose. Do not invent a different story just to make the prompt longer."
      },
      {
        "title": "Duration-Aware Pacing",
        "body": "Match scene density to clip length so prompts stay filmable:\n\nAbout 1-4s: describe exactly 1 action or moment.\nAbout 5-8s: describe about 2 sequential actions.\nAbout 9-12s: describe about 3 sequential actions.\nLonger clips: add only a small number of additional sequential beats. Do not turn the prompt into a montage or a full story arc unless the duration clearly supports it."
      },
      {
        "title": "Orientation Mapping",
        "body": "When the user explicitly asks for an orientation or aspect ratio, map it to safe LTX dimensions:\n\nvertical, portrait, story, reel, tiktok -> -w 1088 -h 1920\nlandscape, horizontal, widescreen, youtube, 16:9 -> -w 1920 -h 1088\nsquare, 1:1 -> -w 1088 -h 1088\n4:3 portrait -> -w 832 -h 1088\n4:3 landscape -> -w 1088 -h 832"
      },
      {
        "title": "Camera Language Normalization",
        "body": "When the user uses loose camera language, translate it into concrete motion phrasing inside the prose prompt:\n\nzoom in -> slow push-in\nzoom out -> slow pull-back\npan left / pan right -> smooth pan left / smooth pan right\norbit / circle around -> slow arc left or slow arc right\nfollow -> tracking follow\n\nShort example:\n\nUser ask: \"4k video of a woman in a neon alley\"\n\nUse this shape instead: \"A medium cinematic shot frames a woman in her 30s standing in a rain-soaked neon alley at night, violet and amber signs reflecting across the wet pavement while warm steam drifts from street vents. She wears a dark trench coat with damp strands of black hair clinging near her cheek as light glances across the fabric texture and the brick walls behind her. She turns toward the camera and steps forward with measured focus, one hand tightening around the strap of her bag while rain taps softly on the metal fire escape and a distant train hum rolls through the block. The camera performs a slow push-in as her jaw sets and her breathing steadies, maintaining smooth stabilized motion and a tense urban-thriller mood.\""
      },
      {
        "title": "Agent Usage",
        "body": "When user asks to generate/draw/create an image:\n\n# Generate and save locally\nnode {{skillDir}}/sogni-gen.mjs -q -o /tmp/generated.png \"user's prompt\"\n\n# Edit an existing image\nnode {{skillDir}}/sogni-gen.mjs -q -c /path/to/input.jpg -o /tmp/edited.png \"make it pop art style\"\n\n# Generate video from image\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /path/to/image.png -o /tmp/video.mp4 \"A medium shot holds on the subject in soft late-afternoon light as fabric edges and background details remain clear and stable. The camera performs a slow push-in while the subject shifts weight subtly and turns slightly toward the lens, keeping the motion gentle and continuous. Leaves rustle softly in the background and the scene maintains smooth cinematic movement with no abrupt action changes.\"\n\n# Generate text-to-video\nnode {{skillDir}}/sogni-gen.mjs -q --video -o /tmp/video.mp4 \"A wide cinematic shot opens on ocean waves rolling toward a rocky shoreline at sunset, golden light spreading across the water while sea mist drifts through the air. Foam patterns form and recede over the dark sand as the horizon glows orange and pink in the distance. The camera glides forward in one continuous movement, holding smooth stabilized motion and calm environmental detail throughout the scene.\"\n\n# HD / \"4K\" text-to-video: prefer LTX-2.3\nnode {{skillDir}}/sogni-gen.mjs -q --video -m ltx23-22b-fp8_t2v_distilled -w 1920 -h 1088 -o /tmp/video.mp4 \"A wide cinematic aerial shot opens over a rugged ocean coastline at golden hour, warm sunlight catching the cliff faces while white surf breaks against dark rock below. Low sea mist hangs over the water and bands of foam trace the shoreline as gulls wheel through the distance. The camera glides forward in one continuous pass, revealing the curve of the coast while wet stone flashes with reflected light and the scene keeps smooth stabilized motion from start to finish. The overall mood feels expansive and polished, with crisp environmental detail and steady travel-film energy.\"\n\n# HD / \"4K\" image-to-video: prefer LTX i2v\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /path/to/image.png -m ltx2-19b-fp8_i2v_distilled -w 1920 -h 1088 -o /tmp/video.mp4 \"A medium cinematic shot holds on the scene with clean subject separation and stable environmental detail as directional light shapes the surfaces and background depth. The camera performs a slow push-in while the main subject makes one subtle continuous movement, keeping posture and identity consistent from start to finish. Ambient motion in the background stays gentle and the overall clip remains smooth, stabilized, and visually coherent.\"\n\n# Photobooth: stylize a face photo\nnode {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -o /tmp/stylized.png \"80s fashion portrait\"\n\n# Check current SPARK/SOGNI balances (no prompt required)\nnode {{skillDir}}/sogni-gen.mjs --json --balance\n\n# Find user-sent images/audio\nnode {{skillDir}}/sogni-gen.mjs --json --list-media images\n\n# Then send via message tool with filePath"
      },
      {
        "title": "High-Res Video Routing",
        "body": "When the user asks for video in \"hd\", \"1080p\", \"4k\", \"uhd\", or \"high-res\", do not use the default WAN video models.\n\nFor text-to-video, use -m ltx23-22b-fp8_t2v_distilled.\nFor image-to-video, use -m ltx2-19b-fp8_i2v_distilled.\nPrefer LTX-sized dimensions such as -w 1920 -h 1088.\nIf the user explicitly asks for vertical, portrait, story, reel, tiktok, square, or 4:3, apply the matching dimensions from the Orientation Mapping rules instead of defaulting to 16:9.\nRewrite the user's request using the LTX-2.3 Prompt Rule before invoking the command. Do not send short slogan-style prompts to LTX.\nTreat \"4k\" as a signal to use the highest practical LTX path exposed by this skill, even if the exact output is not literal 3840x2160.\n\nSecurity: Agents must use the CLI's built-in flags (--extract-last-frame, --concat-videos, --list-media) for all file operations and video manipulation. Never run raw shell commands (ffmpeg, ls, cp, etc.) directly."
      },
      {
        "title": "Animate Between Two Images (First-Frame / Last-Frame)",
        "body": "When a user asks to animate between two images, use --ref (first frame) and --ref-end (last frame) to create a creative interpolation video:\n\n# Animate from image A to image B\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /tmp/imageA.png --ref-end /tmp/imageB.png -o /tmp/transition.mp4 \"descriptive prompt of the transition\""
      },
      {
        "title": "Animate a Video to an Image (Scene Continuation)",
        "body": "When a user asks to animate from a video to an image (or \"continue\" a video into a new scene):\n\nExtract the last frame of the existing video using the built-in safe wrapper:\nnode {{skillDir}}/sogni-gen.mjs --extract-last-frame /tmp/existing.mp4 /tmp/lastframe.png\n\n\nGenerate a new video using the last frame as --ref and the target image as --ref-end:\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /tmp/lastframe.png --ref-end /tmp/target.png -o /tmp/continuation.mp4 \"scene transition prompt\"\n\n\nConcatenate the videos using the built-in safe wrapper:\nnode {{skillDir}}/sogni-gen.mjs --concat-videos /tmp/full_sequence.mp4 /tmp/existing.mp4 /tmp/continuation.mp4\n\nThis ensures visual continuity — the new clip picks up exactly where the previous one ended.\n\nDo NOT run raw ffmpeg commands. Always use --extract-last-frame and --concat-videos for video manipulation.\n\nAlways apply this pattern when:\n\nUser says \"animate image A to image B\" → use --ref A --ref-end B\nUser says \"animate this video to this image\" → extract last frame, use as --ref, target image as --ref-end, then stitch\nUser says \"continue this video\" with a target image → same as above"
      },
      {
        "title": "JSON Output",
        "body": "{\n  \"success\": true,\n  \"prompt\": \"a cat wearing a hat\",\n  \"model\": \"z_image_turbo_bf16\", \n  \"width\": 512,\n  \"height\": 512,\n  \"urls\": [\"https://...\"],\n  \"localPath\": \"/tmp/cat.png\"\n}\n\nOn error (with --json), the script returns a single JSON object like:\n\n{\n  \"success\": false,\n  \"error\": \"Video width and height must be divisible by 16 (got 500x512).\",\n  \"errorCode\": \"INVALID_VIDEO_SIZE\",\n  \"hint\": \"Choose --width/--height divisible by 16. For i2v, also match the reference aspect ratio.\"\n}\n\nBalance check example (--json --balance):\n\n{\n  \"success\": true,\n  \"type\": \"balance\",\n  \"spark\": 12.34,\n  \"sogni\": 0.56\n}"
      },
      {
        "title": "Cost",
        "body": "Uses Spark tokens from your Sogni account. 512x512 images are most cost-efficient."
      },
      {
        "title": "Troubleshooting",
        "body": "Auth errors: Check SOGNI_API_KEY or the credentials in ~/.config/sogni/credentials\ni2v sizing gotchas: Video sizes are constrained (min 480px, max 1536px, divisible by 16). For i2v, the client wrapper resizes the reference (fit: inside) and uses the resized dimensions as the final video size. Because this uses rounding, a requested size can still yield an invalid final size (example: 1024x1536 requested but ref becomes 1024x1535).\nAuto-adjustment: With a local --ref, the script will auto-adjust the requested size to avoid non-16 resized reference dimensions.\nIf the script adjusts your size but you want to fail instead: pass --strict-size and it will print a suggested --width/--height.\nTimeouts: Try a faster model or increase -t timeout\nNo workers: Check https://sogni.ai for network status"
      }
    ],
    "body": "Sogni Image & Video Generation\n\nGenerate images and videos using Sogni AI's decentralized GPU network.\n\nSetup\nGet Sogni credentials at https://app.sogni.ai/\nCreate credentials file:\nmkdir -p ~/.config/sogni\ncat > ~/.config/sogni/credentials << 'EOF'\nSOGNI_API_KEY=your_api_key\n# or:\n# SOGNI_USERNAME=your_username\n# SOGNI_PASSWORD=your_password\nEOF\nchmod 600 ~/.config/sogni/credentials\n\n\nYou can also export SOGNI_API_KEY, or SOGNI_USERNAME + SOGNI_PASSWORD, instead of writing the file.\n\nInstall dependencies (if cloned):\ncd /path/to/sogni-gen\nnpm i\n\nOr install from npm (no git clone):\nmkdir -p ~/.clawdbot/skills\ncd ~/.clawdbot/skills\nnpm i sogni-gen\nln -sfn node_modules/sogni-gen sogni-gen\n\n\nWhen this skill is distributed via ClawHub, it bootstraps its local runtime dependencies from skill-package.json during install. That avoids relying on a root package.json being present in the published skill artifact.\n\nFilesystem Paths and Overrides\n\nDefault file paths used by this skill:\n\nCredentials file (read): ~/.config/sogni/credentials\nLast render metadata (read/write): ~/.config/sogni/last-render.json\nOpenClaw config (read): ~/.openclaw/openclaw.json\nMedia listing for --list-media (read): ~/.clawdbot/media/inbound\nMCP local result copies (write): ~/Downloads/sogni\n\nPath override environment variables:\n\nSOGNI_CREDENTIALS_PATH\nSOGNI_LAST_RENDER_PATH\nSOGNI_MEDIA_INBOUND_DIR\nOPENCLAW_CONFIG_PATH\nSOGNI_DOWNLOADS_DIR (MCP)\nSOGNI_MCP_SAVE_DOWNLOADS=0 to disable MCP local file writes\nSOGNI_ALLOWED_DOWNLOAD_HOSTS to override which HTTPS hosts the MCP server may auto-download and save locally\nUsage (Images & Video)\n# Generate and get URL\nnode sogni-gen.mjs \"a cat wearing a hat\"\n\n# Save to file\nnode sogni-gen.mjs -o /tmp/cat.png \"a cat wearing a hat\"\n\n# JSON output (for scripting)\nnode sogni-gen.mjs --json \"a cat wearing a hat\"\n\n# Check token balances (no prompt required)\nnode sogni-gen.mjs --balance\n\n# Check token balances in JSON\nnode sogni-gen.mjs --json --balance\n\n# Quiet mode (suppress progress)\nnode sogni-gen.mjs -q -o /tmp/cat.png \"a cat wearing a hat\"\n\nOptions\nFlag\tDescription\tDefault\n-o, --output <path>\tSave to file\tprints URL\n-m, --model <id>\tModel ID\tz_image_turbo_bf16\n-w, --width <px>\tWidth\t512\n-h, --height <px>\tHeight\t512\n-n, --count <num>\tNumber of images\t1\n-t, --timeout <sec>\tTimeout seconds\t30 (300 for video)\n-s, --seed <num>\tSpecific seed\trandom\n--last-seed\tReuse seed from last render\t-\n--seed-strategy <s>\tSeed strategy: random|prompt-hash\tprompt-hash\n--multi-angle\tMultiple angles LoRA mode (Qwen Image Edit)\t-\n--angles-360\tGenerate 8 azimuths (front -> front-left)\t-\n--angles-360-video\tAssemble looping 360 mp4 using i2v between angles (requires ffmpeg)\t-\n--azimuth <key>\tfront|front-right|right|back-right|back|back-left|left|front-left\tfront\n--elevation <key>\tlow-angle|eye-level|elevated|high-angle\teye-level\n--distance <key>\tclose-up|medium|wide\tmedium\n--angle-strength <n>\tLoRA strength for multiple_angles\t0.9\n--angle-description <text>\tOptional subject description\t-\n--steps <num>\tOverride steps (model-dependent)\t-\n--guidance <num>\tOverride guidance (model-dependent)\t-\n--output-format <f>\tImage output format: png|jpg\tpng\n--sampler <name>\tSampler (model-dependent)\t-\n--scheduler <name>\tScheduler (model-dependent)\t-\n--lora <id>\tLoRA id (repeatable, edit only)\t-\n--loras <ids>\tComma-separated LoRA ids\t-\n--lora-strength <n>\tLoRA strength (repeatable)\t-\n--lora-strengths <n>\tComma-separated LoRA strengths\t-\n--token-type <type>\tToken type: spark|sogni\tspark\n--balance, --balances\tShow SPARK/SOGNI balances and exit\t-\n-c, --context <path>\tContext image for editing\t-\n--last-image\tUse last generated image as context/ref\t-\n--video, -v\tGenerate video instead of image\t-\n--workflow <type>\tVideo workflow (t2v|i2v|s2v|ia2v|a2v|v2v|animate-move|animate-replace)\tinferred\n--fps <num>\tFrames per second (video)\t16\n--duration <sec>\tDuration in seconds (video)\t5\n--frames <num>\tOverride total frames (video)\t-\n--auto-resize-assets\tAuto-resize video assets\ttrue\n--no-auto-resize-assets\tDisable auto-resize\t-\n--estimate-video-cost\tEstimate video cost and exit (requires --steps)\t-\n--photobooth\tFace transfer mode (InstantID + SDXL Turbo)\t-\n--cn-strength <n>\tControlNet strength (photobooth)\t0.8\n--cn-guidance-end <n>\tControlNet guidance end point (photobooth)\t0.3\n--ref <path>\tReference image for video or photobooth face\trequired for video/photobooth\n--ref-end <path>\tEnd frame for i2v interpolation\t-\n--ref-audio <path>\tReference audio for s2v\t-\n--ref-video <path>\tReference video for animate/v2v workflows\t-\n--controlnet-name <name>\tControlNet type for v2v: canny|pose|depth|detailer\t-\n--controlnet-strength <n>\tControlNet strength for v2v (0.0-1.0)\t0.8\n--sam2-coordinates <coords>\tSAM2 click coords for animate-replace (x,y or x1,y1;x2,y2)\t-\n--trim-end-frame\tTrim last frame for seamless video stitching\t-\n--first-frame-strength <n>\tKeyframe strength for start frame (0.0-1.0)\t-\n--last-frame-strength <n>\tKeyframe strength for end frame (0.0-1.0)\t-\n--last\tShow last render info\t-\n--json\tJSON output\tfalse\n--strict-size\tDo not auto-adjust i2v video size for reference resizing constraints\tfalse\n-q, --quiet\tNo progress output\tfalse\n--extract-last-frame <video> <image>\tExtract last frame from video (safe ffmpeg wrapper)\t-\n--concat-videos <out> <clips...>\tConcatenate video clips (safe ffmpeg wrapper)\t-\n--list-media [type]\tList recent inbound media (images|audio|all)\timages\nOpenClaw Config Defaults\n\nWhen installed as an OpenClaw plugin, sogni-gen will read defaults from:\n\n~/.openclaw/openclaw.json\n\n{\n  \"plugins\": {\n    \"entries\": {\n      \"sogni-gen\": {\n        \"enabled\": true,\n        \"config\": {\n          \"defaultImageModel\": \"z_image_turbo_bf16\",\n          \"defaultEditModel\": \"qwen_image_edit_2511_fp8_lightning\",\n          \"defaultPhotoboothModel\": \"coreml-sogniXLturbo_alpha1_ad\",\n          \"videoModels\": {\n            \"t2v\": \"wan_v2.2-14b-fp8_t2v_lightx2v\",\n            \"i2v\": \"wan_v2.2-14b-fp8_i2v_lightx2v\",\n            \"s2v\": \"wan_v2.2-14b-fp8_s2v_lightx2v\",\n            \"ia2v\": \"ltx2-19b-fp8_ia2v_distilled\",\n            \"a2v\": \"ltx2-19b-fp8_a2v_distilled\",\n            \"animate-move\": \"wan_v2.2-14b-fp8_animate-move_lightx2v\",\n            \"animate-replace\": \"wan_v2.2-14b-fp8_animate-replace_lightx2v\",\n            \"v2v\": \"ltx2-19b-fp8_v2v_distilled\"\n          },\n          \"defaultVideoWorkflow\": \"t2v\",\n          \"defaultNetwork\": \"fast\",\n          \"defaultTokenType\": \"spark\",\n          \"seedStrategy\": \"prompt-hash\",\n          \"modelDefaults\": {\n            \"flux1-schnell-fp8\": { \"steps\": 4, \"guidance\": 3.5 },\n            \"flux2_dev_fp8\": { \"steps\": 20, \"guidance\": 7.5 }\n          },\n          \"defaultWidth\": 768,\n          \"defaultHeight\": 768,\n          \"defaultCount\": 1,\n          \"defaultFps\": 16,\n          \"defaultDurationSec\": 5,\n          \"defaultImageTimeoutSec\": 30,\n          \"defaultVideoTimeoutSec\": 300,\n          \"credentialsPath\": \"~/.config/sogni/credentials\",\n          \"lastRenderPath\": \"~/.config/sogni/last-render.json\",\n          \"mediaInboundDir\": \"~/.clawdbot/media/inbound\"\n        }\n      }\n    }\n  }\n}\n\n\nCLI flags always override these defaults. If your OpenClaw config lives elsewhere, set OPENCLAW_CONFIG_PATH. Seed strategies: prompt-hash (deterministic) or random.\n\nImage Models\nModel\tSpeed\tUse Case\nz_image_turbo_bf16\tFast (~5-10s)\tGeneral purpose, default\nflux1-schnell-fp8\tVery fast\tQuick iterations\nflux2_dev_fp8\tSlow (~2min)\tHigh quality\nchroma-v.46-flash_fp8\tMedium\tBalanced\nqwen_image_edit_2511_fp8\tMedium\tImage editing with context (up to 3)\nqwen_image_edit_2511_fp8_lightning\tFast\tQuick image editing\ncoreml-sogniXLturbo_alpha1_ad\tFast\tPhotobooth face transfer (SDXL Turbo)\nVideo Models\nWAN 2.2 Models\nModel\tSpeed\tUse Case\nwan_v2.2-14b-fp8_i2v_lightx2v\tFast\tDefault video generation\nwan_v2.2-14b-fp8_i2v\tSlow\tHigher quality video\nwan_v2.2-14b-fp8_t2v_lightx2v\tFast\tText-to-video\nwan_v2.2-14b-fp8_s2v_lightx2v\tFast\tSound-to-video\nwan_v2.2-14b-fp8_animate-move_lightx2v\tFast\tAnimate-move\nwan_v2.2-14b-fp8_animate-replace_lightx2v\tFast\tAnimate-replace\nLTX-2 / LTX-2.3 Models\nModel\tSpeed\tUse Case\nltx2-19b-fp8_t2v_distilled\tFast (~2-3min)\tText-to-video, 8-step\nltx2-19b-fp8_t2v\tMedium (~5min)\tText-to-video, 20-step quality\nltx2-19b-fp8_i2v_distilled\tFast (~2-3min)\tImage-to-video, 8-step\nltx2-19b-fp8_i2v\tMedium (~5min)\tImage-to-video, 20-step quality\nltx2-19b-fp8_ia2v_distilled\tFast (~2-3min)\tImage+audio-to-video\nltx2-19b-fp8_a2v_distilled\tFast (~2-3min)\tAudio-to-video\nltx2-19b-fp8_v2v_distilled\tFast (~3min)\tVideo-to-video with ControlNet\nltx2-19b-fp8_v2v\tMedium (~5min)\tVideo-to-video with ControlNet, quality\nltx23-22b-fp8_t2v_distilled\tFast (~2-3min)\tText-to-video, LTX-2.3\nImage Editing with Context\n\nEdit images using reference images (Qwen models support up to 3):\n\n# Single context image\nnode sogni-gen.mjs -c photo.jpg \"make the background a beach\"\n\n# Multiple context images (subject + style)\nnode sogni-gen.mjs -c subject.jpg -c style.jpg \"apply the style to the subject\"\n\n# Use last generated image as context\nnode sogni-gen.mjs --last-image \"make it more vibrant\"\n\n\nWhen context images are provided without -m, defaults to qwen_image_edit_2511_fp8_lightning.\n\nPhotobooth (Face Transfer)\n\nGenerate stylized portraits from a face photo using InstantID ControlNet. When a user mentions \"photobooth\", wants a stylized portrait of themselves, or asks to transfer their face into a style, use --photobooth with --ref pointing to their face image.\n\n# Basic photobooth\nnode sogni-gen.mjs --photobooth --ref face.jpg \"80s fashion portrait\"\n\n# Multiple outputs\nnode sogni-gen.mjs --photobooth --ref face.jpg -n 4 \"LinkedIn professional headshot\"\n\n# Custom ControlNet tuning\nnode sogni-gen.mjs --photobooth --ref face.jpg --cn-strength 0.6 --cn-guidance-end 0.5 \"oil painting\"\n\n\nUses SDXL Turbo (coreml-sogniXLturbo_alpha1_ad) at 1024x1024 by default. The face image is passed via --ref and styled according to the prompt. Cannot be combined with --video or -c/--context.\n\nAgent usage:\n\n# Photobooth: stylize a face photo\nnode {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -o /tmp/stylized.png \"80s fashion portrait\"\n\n# Multiple photobooth outputs\nnode {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -n 4 -o /tmp/stylized.png \"LinkedIn professional headshot\"\n\nMultiple Angles (Turnaround)\n\nGenerate specific camera angles from a single reference image using the Multiple Angles LoRA:\n\n# Single angle\nnode sogni-gen.mjs --multi-angle -c subject.jpg \\\n  --azimuth front-right --elevation eye-level --distance medium \\\n  --angle-strength 0.9 \\\n  \"studio portrait, same person\"\n\n# 360 sweep (8 azimuths)\nnode sogni-gen.mjs --angles-360 -c subject.jpg --distance medium --elevation eye-level \\\n  \"studio portrait, same person\"\n\n# 360 sweep video (looping mp4, uses i2v between angles; requires ffmpeg)\nnode sogni-gen.mjs --angles-360 --angles-360-video /tmp/turntable.mp4 \\\n  -c subject.jpg --distance medium --elevation eye-level \\\n  \"studio portrait, same person\"\n\n\nThe prompt is auto-built with the required <sks> token plus the selected camera angle keywords. --angles-360-video generates i2v clips between consecutive angles (including last→first) and concatenates them with ffmpeg for a seamless loop.\n\n360 Video Best Practices\n\nWhen a user requests a \"360 video\", follow this workflow:\n\nDefault camera parameters (do not ask unless they specify):\n\nElevation: default to medium\nDistance: default to medium\n\nMap user terms to flags:\n\nUser says\tFlag value\n\"high\" angle\t--elevation high-angle\n\"medium\" angle\t--elevation eye-level\n\"low\" angle\t--elevation low-angle\n\"close\"\t--distance close-up\n\"medium\" distance\t--distance medium\n\"far\"\t--distance wide\n\nAlways use first-frame/last-frame stitching - the --angles-360-video flag automatically handles this by generating i2v clips between consecutive angles including last→first for seamless looping.\n\nExample command:\n\nnode sogni-gen.mjs --angles-360 --angles-360-video /tmp/output.mp4 \\\n  -c /path/to/image.png --elevation eye-level --distance medium \\\n  \"description of subject\"\n\nTransition Video Rule\n\nFor any transition video work, always use the Sogni skill/plugin (not raw ffmpeg or other shell commands). Use the built-in --extract-last-frame, --concat-videos, and --looping flags for video manipulation.\n\nInsufficient Funds Handling\n\nWhen you see \"Debit Error: Insufficient funds\", reply:\n\n\"Insufficient funds. Claim 50 free daily Spark points at https://app.sogni.ai/\"\n\nVideo Generation\n\nGenerate videos from a reference image:\n\n# Text-to-video (t2v)\nnode sogni-gen.mjs --video \"ocean waves at sunset\"\n\n# Basic video from image\nnode sogni-gen.mjs --video --ref cat.jpg -o cat.mp4 \"cat walks around\"\n\n# Use last generated image as reference\nnode sogni-gen.mjs --last-image --video \"gentle camera pan\"\n\n# Custom duration and FPS\nnode sogni-gen.mjs --video --ref scene.png --duration 10 --fps 24 \"zoom out slowly\"\n\n# Sound-to-video (s2v)\nnode sogni-gen.mjs --video --ref face.jpg --ref-audio speech.m4a \\\n  -m wan_v2.2-14b-fp8_s2v_lightx2v \"lip sync talking head\"\n\n# Image+audio-to-video (ia2v, LTX)\nnode sogni-gen.mjs --video --workflow ia2v --ref cover.jpg --ref-audio song.mp3 \\\n  \"music video with synchronized motion\"\n\n# Audio-to-video (a2v, LTX)\nnode sogni-gen.mjs --video --workflow a2v --ref-audio song.mp3 \\\n  \"abstract audio-reactive visualizer\"\n\n# LTX-2.3 text-to-video\nnode sogni-gen.mjs --video -m ltx23-22b-fp8_t2v_distilled --duration 20 \\\n  \"A wide cinematic aerial shot opens over steep tropical cliffs at golden hour, warm sunlight grazing the rock faces while sea mist drifts above the water below. Palm trees bend gently along the ridge as waves roll against the shoreline, leaving bright bands of foam across the dark stone. The camera glides forward in one continuous pass, revealing more of the coastline as sunlight flickers across wet surfaces and distant birds wheel through the haze. The scene holds a calm, upscale travel-film mood with smooth stabilized motion and crisp environmental detail.\"\n\n# Animate (motion transfer)\nnode sogni-gen.mjs --video --ref subject.jpg --ref-video motion.mp4 \\\n  --workflow animate-move \"transfer motion\"\n\nVideo-to-Video (V2V) with ControlNet\n\nTransform an existing video using LTX-2 models with ControlNet guidance:\n\n# Basic v2v with canny edge detection\nnode sogni-gen.mjs --video --workflow v2v --ref-video input.mp4 \\\n  --controlnet-name canny \"stylized anime version\"\n\n# V2V with pose detection and custom strength\nnode sogni-gen.mjs --video --workflow v2v --ref-video dance.mp4 \\\n  --controlnet-name pose --controlnet-strength 0.7 \"robot dancing\"\n\n# V2V with depth map\nnode sogni-gen.mjs --video --workflow v2v --ref-video scene.mp4 \\\n  --controlnet-name depth \"watercolor painting style\"\n\n\nControlNet types: canny (edge detection), pose (body pose), depth (depth map), detailer (detail enhancement).\n\nPhoto Restoration\n\nRestore damaged vintage photos using Qwen image editing:\n\n# Basic restoration\nsogni-gen -c damaged_photo.jpg -o restored.png \\\n  \"professionally restore this vintage photograph, remove damage and scratches\"\n\n# Detailed restoration with preservation hints\nsogni-gen -c old_photo.jpg -o restored.png -w 1024 -h 1280 \\\n  \"restore this vintage photo, remove peeling, tears and wear marks, \\\n  preserve natural features and expression, maintain warm nostalgic color tones\"\n\n\nTips for good restorations:\n\nDescribe the damage: \"peeling\", \"scratches\", \"tears\", \"fading\"\nSpecify what to preserve: \"natural features\", \"eye color\", \"hair\", \"expression\"\nMention the era for color tones: \"1970s warm tones\", \"vintage sepia\"\n\nFinding received images (Telegram/etc):\n\nnode {{skillDir}}/sogni-gen.mjs --json --list-media images\n\n\nDo NOT use ls, cp, or other shell commands to browse user files. Always use --list-media to find inbound media.\n\nIMPORTANT KEYWORD RULE\nIf the user message includes the word \"photobooth\" (case-insensitive), always use --photobooth mode with --ref set to the user-provided face image.\nPrioritize this rule over generic image-edit flows (-c) for that request.\nLTX-2.3 Prompt Rule\n\nWhenever the chosen video model is ltx23-22b-fp8_t2v_distilled, do not pass the user's short request through unchanged. Rewrite it into an LTX-2.3-safe prompt before calling sogni-gen.\n\nOutput one single paragraph only. No line breaks, bullet points, section labels, tag lists, or screenplay formatting.\nUse 4-8 flowing present-tense sentences describing one continuous shot. No cuts, montage, or unrelated scene jumps.\nStart with shot scale plus the scene's visual identity, then describe environment, time of day, atmosphere, textures, and specific light sources.\nKeep people, clothing, props, and locations concrete and stable across the whole paragraph.\nGive the scene one main action thread from start to finish. Use connectors like as, while, and then so motion reads as a continuous filmed moment.\nIf the user asks for dialogue, embed the spoken words inline as prose and identify who is speaking and how they deliver the line.\nExpress emotion through visible physical cues such as posture, grip, jaw tension, breathing, or pacing. Ambient sound can be woven into the prose naturally.\nUse positive phrasing only. Do not add negative prompts, \"no ...\" clauses, on-screen text/logo requests, vague filler words like beautiful or nice, or structural markup such as [DIALOGUE].\nKeep action density proportional to duration. For short clips, describe one main beat rather than several separate events.\nPreserve the user's request, but expand it into cinematic prose. Do not invent a different story just to make the prompt longer.\nDuration-Aware Pacing\n\nMatch scene density to clip length so prompts stay filmable:\n\nAbout 1-4s: describe exactly 1 action or moment.\nAbout 5-8s: describe about 2 sequential actions.\nAbout 9-12s: describe about 3 sequential actions.\nLonger clips: add only a small number of additional sequential beats. Do not turn the prompt into a montage or a full story arc unless the duration clearly supports it.\nOrientation Mapping\n\nWhen the user explicitly asks for an orientation or aspect ratio, map it to safe LTX dimensions:\n\nvertical, portrait, story, reel, tiktok -> -w 1088 -h 1920\nlandscape, horizontal, widescreen, youtube, 16:9 -> -w 1920 -h 1088\nsquare, 1:1 -> -w 1088 -h 1088\n4:3 portrait -> -w 832 -h 1088\n4:3 landscape -> -w 1088 -h 832\nCamera Language Normalization\n\nWhen the user uses loose camera language, translate it into concrete motion phrasing inside the prose prompt:\n\nzoom in -> slow push-in\nzoom out -> slow pull-back\npan left / pan right -> smooth pan left / smooth pan right\norbit / circle around -> slow arc left or slow arc right\nfollow -> tracking follow\n\nShort example:\n\nUser ask: \"4k video of a woman in a neon alley\"\n\nUse this shape instead: \"A medium cinematic shot frames a woman in her 30s standing in a rain-soaked neon alley at night, violet and amber signs reflecting across the wet pavement while warm steam drifts from street vents. She wears a dark trench coat with damp strands of black hair clinging near her cheek as light glances across the fabric texture and the brick walls behind her. She turns toward the camera and steps forward with measured focus, one hand tightening around the strap of her bag while rain taps softly on the metal fire escape and a distant train hum rolls through the block. The camera performs a slow push-in as her jaw sets and her breathing steadies, maintaining smooth stabilized motion and a tense urban-thriller mood.\"\n\nAgent Usage\n\nWhen user asks to generate/draw/create an image:\n\n# Generate and save locally\nnode {{skillDir}}/sogni-gen.mjs -q -o /tmp/generated.png \"user's prompt\"\n\n# Edit an existing image\nnode {{skillDir}}/sogni-gen.mjs -q -c /path/to/input.jpg -o /tmp/edited.png \"make it pop art style\"\n\n# Generate video from image\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /path/to/image.png -o /tmp/video.mp4 \"A medium shot holds on the subject in soft late-afternoon light as fabric edges and background details remain clear and stable. The camera performs a slow push-in while the subject shifts weight subtly and turns slightly toward the lens, keeping the motion gentle and continuous. Leaves rustle softly in the background and the scene maintains smooth cinematic movement with no abrupt action changes.\"\n\n# Generate text-to-video\nnode {{skillDir}}/sogni-gen.mjs -q --video -o /tmp/video.mp4 \"A wide cinematic shot opens on ocean waves rolling toward a rocky shoreline at sunset, golden light spreading across the water while sea mist drifts through the air. Foam patterns form and recede over the dark sand as the horizon glows orange and pink in the distance. The camera glides forward in one continuous movement, holding smooth stabilized motion and calm environmental detail throughout the scene.\"\n\n# HD / \"4K\" text-to-video: prefer LTX-2.3\nnode {{skillDir}}/sogni-gen.mjs -q --video -m ltx23-22b-fp8_t2v_distilled -w 1920 -h 1088 -o /tmp/video.mp4 \"A wide cinematic aerial shot opens over a rugged ocean coastline at golden hour, warm sunlight catching the cliff faces while white surf breaks against dark rock below. Low sea mist hangs over the water and bands of foam trace the shoreline as gulls wheel through the distance. The camera glides forward in one continuous pass, revealing the curve of the coast while wet stone flashes with reflected light and the scene keeps smooth stabilized motion from start to finish. The overall mood feels expansive and polished, with crisp environmental detail and steady travel-film energy.\"\n\n# HD / \"4K\" image-to-video: prefer LTX i2v\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /path/to/image.png -m ltx2-19b-fp8_i2v_distilled -w 1920 -h 1088 -o /tmp/video.mp4 \"A medium cinematic shot holds on the scene with clean subject separation and stable environmental detail as directional light shapes the surfaces and background depth. The camera performs a slow push-in while the main subject makes one subtle continuous movement, keeping posture and identity consistent from start to finish. Ambient motion in the background stays gentle and the overall clip remains smooth, stabilized, and visually coherent.\"\n\n# Photobooth: stylize a face photo\nnode {{skillDir}}/sogni-gen.mjs -q --photobooth --ref /path/to/face.jpg -o /tmp/stylized.png \"80s fashion portrait\"\n\n# Check current SPARK/SOGNI balances (no prompt required)\nnode {{skillDir}}/sogni-gen.mjs --json --balance\n\n# Find user-sent images/audio\nnode {{skillDir}}/sogni-gen.mjs --json --list-media images\n\n# Then send via message tool with filePath\n\nHigh-Res Video Routing\n\nWhen the user asks for video in \"hd\", \"1080p\", \"4k\", \"uhd\", or \"high-res\", do not use the default WAN video models.\n\nFor text-to-video, use -m ltx23-22b-fp8_t2v_distilled.\nFor image-to-video, use -m ltx2-19b-fp8_i2v_distilled.\nPrefer LTX-sized dimensions such as -w 1920 -h 1088.\nIf the user explicitly asks for vertical, portrait, story, reel, tiktok, square, or 4:3, apply the matching dimensions from the Orientation Mapping rules instead of defaulting to 16:9.\nRewrite the user's request using the LTX-2.3 Prompt Rule before invoking the command. Do not send short slogan-style prompts to LTX.\nTreat \"4k\" as a signal to use the highest practical LTX path exposed by this skill, even if the exact output is not literal 3840x2160.\n\nSecurity: Agents must use the CLI's built-in flags (--extract-last-frame, --concat-videos, --list-media) for all file operations and video manipulation. Never run raw shell commands (ffmpeg, ls, cp, etc.) directly.\n\nAnimate Between Two Images (First-Frame / Last-Frame)\n\nWhen a user asks to animate between two images, use --ref (first frame) and --ref-end (last frame) to create a creative interpolation video:\n\n# Animate from image A to image B\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /tmp/imageA.png --ref-end /tmp/imageB.png -o /tmp/transition.mp4 \"descriptive prompt of the transition\"\n\nAnimate a Video to an Image (Scene Continuation)\n\nWhen a user asks to animate from a video to an image (or \"continue\" a video into a new scene):\n\nExtract the last frame of the existing video using the built-in safe wrapper:\nnode {{skillDir}}/sogni-gen.mjs --extract-last-frame /tmp/existing.mp4 /tmp/lastframe.png\n\nGenerate a new video using the last frame as --ref and the target image as --ref-end:\nnode {{skillDir}}/sogni-gen.mjs -q --video --ref /tmp/lastframe.png --ref-end /tmp/target.png -o /tmp/continuation.mp4 \"scene transition prompt\"\n\nConcatenate the videos using the built-in safe wrapper:\nnode {{skillDir}}/sogni-gen.mjs --concat-videos /tmp/full_sequence.mp4 /tmp/existing.mp4 /tmp/continuation.mp4\n\n\nThis ensures visual continuity — the new clip picks up exactly where the previous one ended.\n\nDo NOT run raw ffmpeg commands. Always use --extract-last-frame and --concat-videos for video manipulation.\n\nAlways apply this pattern when:\n\nUser says \"animate image A to image B\" → use --ref A --ref-end B\nUser says \"animate this video to this image\" → extract last frame, use as --ref, target image as --ref-end, then stitch\nUser says \"continue this video\" with a target image → same as above\nJSON Output\n{\n  \"success\": true,\n  \"prompt\": \"a cat wearing a hat\",\n  \"model\": \"z_image_turbo_bf16\", \n  \"width\": 512,\n  \"height\": 512,\n  \"urls\": [\"https://...\"],\n  \"localPath\": \"/tmp/cat.png\"\n}\n\n\nOn error (with --json), the script returns a single JSON object like:\n\n{\n  \"success\": false,\n  \"error\": \"Video width and height must be divisible by 16 (got 500x512).\",\n  \"errorCode\": \"INVALID_VIDEO_SIZE\",\n  \"hint\": \"Choose --width/--height divisible by 16. For i2v, also match the reference aspect ratio.\"\n}\n\n\nBalance check example (--json --balance):\n\n{\n  \"success\": true,\n  \"type\": \"balance\",\n  \"spark\": 12.34,\n  \"sogni\": 0.56\n}\n\nCost\n\nUses Spark tokens from your Sogni account. 512x512 images are most cost-efficient.\n\nTroubleshooting\nAuth errors: Check SOGNI_API_KEY or the credentials in ~/.config/sogni/credentials\ni2v sizing gotchas: Video sizes are constrained (min 480px, max 1536px, divisible by 16). For i2v, the client wrapper resizes the reference (fit: inside) and uses the resized dimensions as the final video size. Because this uses rounding, a requested size can still yield an invalid final size (example: 1024x1536 requested but ref becomes 1024x1535).\nAuto-adjustment: With a local --ref, the script will auto-adjust the requested size to avoid non-16 resized reference dimensions.\nIf the script adjusts your size but you want to fail instead: pass --strict-size and it will print a suggested --width/--height.\nTimeouts: Try a faster model or increase -t timeout\nNo workers: Check https://sogni.ai for network status"
  },
  "trust": {
    "sourceLabel": "tencent",
    "provenanceUrl": "https://clawhub.ai/krunkosaurus/sogni-gen",
    "publisherUrl": "https://clawhub.ai/krunkosaurus/sogni-gen",
    "owner": "krunkosaurus",
    "version": "1.5.16",
    "license": null,
    "verificationStatus": "Indexed source record"
  },
  "links": {
    "detailUrl": "https://openagent3.xyz/skills/sogni-gen",
    "downloadUrl": "https://openagent3.xyz/downloads/sogni-gen",
    "agentUrl": "https://openagent3.xyz/skills/sogni-gen/agent",
    "manifestUrl": "https://openagent3.xyz/skills/sogni-gen/agent.json",
    "briefUrl": "https://openagent3.xyz/skills/sogni-gen/agent.md"
  }
}