From 4ced37897ce01ab56b75d2d0403f0986cffd44aa Mon Sep 17 00:00:00 2001 From: Jobdori Date: Sun, 26 Apr 2026 04:17:24 +0900 Subject: [PATCH] =?UTF-8?q?roadmap:=20#227=20filed=20=E2=80=94=20Video-gen?= =?UTF-8?q?eration=20API=20typed=20taxonomy=20is=20structurally=20absent:?= =?UTF-8?q?=20zero=20`/v1/videos/generations`=20+=20zero=20`/v1/videos/edi?= =?UTF-8?q?ts`=20+=20zero=20`/v1/videos/extends`=20+=20zero=20`/v1/videos/?= =?UTF-8?q?{id}`=20polling-and-retrieval=20endpoint=20surface=20across=20b?= =?UTF-8?q?oth=20Anthropic-native=20and=20OpenAI-compat=20lanes,=20zero=20?= =?UTF-8?q?VideoGenerationRequest=20/=20VideoEditRequest=20/=20VideoExtend?= =?UTF-8?q?Request=20/=20VideoGenerationResponse=20/=20VideoObject=20/=20V?= =?UTF-8?q?ideoQuality=20/=20VideoResolution=20/=20VideoAspectRatio=20/=20?= =?UTF-8?q?VideoDuration=20/=20VideoOutputFormat=20/=20VideoFrameRate=20/?= =?UTF-8?q?=20VideoCodec=20/=20VideoStyle=20/=20VideoSource=20/=20VideoMed?= =?UTF-8?q?iaType=20/=20VideoTaskStatus=20/=20VideoTaskId=20typed=20model?= =?UTF-8?q?=20in=20rust/crates/api/src/types.rs,=20zero=20Video=20variant?= =?UTF-8?q?=20on=20OutputContentBlock=20(4-arm=20exhaustive:=20Text/ToolUs?= =?UTF-8?q?e/Thinking/RedactedThinking=20=E2=80=94=20extending=20#226's=20?= =?UTF-8?q?asymmetric-output-only=20modality=20axis=20with=20new=20tempora?= =?UTF-8?q?l-duration=20dimension),=20zero=20generate=5Fvideo=20/=20edit?= =?UTF-8?q?=5Fvideo=20/=20extend=5Fvideo=20/=20retrieve=5Fvideo=5Ftask=20m?= =?UTF-8?q?ethods=20on=20Provider=20trait=20at=20rust/crates/api/src/provi?= =?UTF-8?q?ders/mod.rs:17-30=20(only=20send=5Fmessage=20+=20stream=5Fmessa?= =?UTF-8?q?ge=20exist,=20both=20per-request=20synchronous=20and=20constrai?= =?UTF-8?q?ned=20to=20text-modality=20chat/completion=20taxonomy=20with=20?= =?UTF-8?q?zero=20video-output=20dispatch=20surface=20AND=20zero=20async-t?= =?UTF-8?q?ask=20polling=20primitive=20=E2=80=94=20the=20canonical=20video?= =?UTF-8?q?-generation=20pattern=20requires=20a=20two-phase=20request/poll?= =?UTF-8?q?=20workflow=20that=20the=20Provider=20trait=20does=20not=20expo?= =?UTF-8?q?se=20because=20every=20existing=20method=20returns=20a=20synchr?= =?UTF-8?q?onous=20response,=20distinct=20from=20#221's=20batch-dispatch?= =?UTF-8?q?=20async=20pattern=20which=20uses=20different=20polling=20shape?= =?UTF-8?q?=20with=20file-upload=20prerequisites=20that=20don't=20apply=20?= =?UTF-8?q?to=20video-gen),=20zero=20video-generation=20dispatch=20on=20Pr?= =?UTF-8?q?oviderClient=20enum=20at=20rust/crates/api/src/client.rs:8-14?= =?UTF-8?q?=20(three=20variants=20Anthropic/Xai/OpenAi,=20zero=20Sora/Veo/?= =?UTF-8?q?Pika/Runway/Luma/Mochi/Kling/Hailuo/Replicate/FalAi/BlackForest?= =?UTF-8?q?Labs/StabilityVideo=20partner-routing=20variants=20=E2=80=94=20?= =?UTF-8?q?twelve-plus-partner-set,=20the=20largest=20partner-set=20yet=20?= =?UTF-8?q?in=20the=20cluster=20surpassing=20#226's=20eight-plus-partner?= =?UTF-8?q?=20image-gen=20set=20because=20video-generation=20is=20the=20mo?= =?UTF-8?q?st-fragmented=20modality=20across=20third-party=20providers=20i?= =?UTF-8?q?n=202024-2026=20with=20every=20major=20lab=20shipping=20its=20o?= =?UTF-8?q?wn=20video-gen=20surface=20in=20the=20post-Sora-launch=20arms?= =?UTF-8?q?=20race),=20zero=20multipart/form-data=20upload=20affordance=20?= =?UTF-8?q?with=20reqwest::multipart=20feature=20flag=20absent=20from=20ru?= =?UTF-8?q?st/crates/api/Cargo.toml=20=E2=80=94=20multipart=20needed=20for?= =?UTF-8?q?=20/v1/videos/edits=20and=20/v1/videos/extends=20subset=20(para?= =?UTF-8?q?llel=20to=20#226's=20image-edits=20subset),=20zero=20async-task?= =?UTF-8?q?=20polling=20primitive=20in=20the=20runtime=20=E2=80=94=20there?= =?UTF-8?q?=20is=20no=20TaskPoller=20/=20AsyncTask=20/=20TaskStatus=20/=20?= =?UTF-8?q?TaskId=20/=20poll=5Ftask=5Funtil=5Fcomplete=20machinery=20anywh?= =?UTF-8?q?ere=20in=20rust/crates/runtime/=20(rg=20returns=20zero=20hits?= =?UTF-8?q?=20for=20task=5Fid/task=5Fstatus/polling/poll=5Ftask/async=5Fta?= =?UTF-8?q?sk/pending=5Ftask=20across=20rust/),=20distinguishing=20video-g?= =?UTF-8?q?eneration's=20async-polling=20pattern=20from=20every=20prior=20?= =?UTF-8?q?cluster=20member=20which=20is=20either=20synchronous=20(#211=20?= =?UTF-8?q?through=20#226=20except=20#221)=20or=20streaming-via-SSE=20(#22?= =?UTF-8?q?1=20batch-dispatch=20is=20closest,=20but=20uses=20different=20p?= =?UTF-8?q?olling=20shape=20with=20file-upload=20prerequisites),=20zero=20?= =?UTF-8?q?claw=20video=20/=20claw=20videos=20/=20claw=20generate-video=20?= =?UTF-8?q?/=20claw=20render-video=20CLI=20subcommand=20at=20rust/crates/r?= =?UTF-8?q?usty-claude-cli/src/main.rs,=20zero=20/sora=20/=20/veo=20/=20/v?= =?UTF-8?q?ideo=20/=20/render-video=20/=20/generate-video=20slash=20comman?= =?UTF-8?q?d=20in=20SlashCommandSpec=20table=20(zero=20video-related=20ent?= =?UTF-8?q?ries=20=E2=80=94=20video-input=20doubly=20absent=20because=20no?= =?UTF-8?q?=20advertised-but-unbuilt=20commands=20AND=20no=20implemented?= =?UTF-8?q?=20commands,=20strict-subset=20of=20#226's=20image-generation?= =?UTF-8?q?=20gap),=20zero=20sora-2=20/=20sora-2-pro=20/=20veo-3=20/=20veo?= =?UTF-8?q?-3-fast=20/=20runway-gen-4=20/=20luma-dream-machine=20/=20pika-?= =?UTF-8?q?2.0=20/=20kling-1.5=20/=20hailuo-i2v-01=20/=20hunyuan-video=20/?= =?UTF-8?q?=20mochi-1=20/=20cogvideox-5b=20/=20stable-video-diffusion-1.1?= =?UTF-8?q?=20entries=20in=20MODEL=5FREGISTRY,=20zero=20video=5Fper=5Fseco?= =?UTF-8?q?nd=5Fcost=5Fusd=20/=20video=5Fper=5Fmegapixel=5Fsecond=5Fcost?= =?UTF-8?q?=5Fusd=20/=20video=5Finput=5Ftoken=5Fcost=5Fper=5Fmillion=20/?= =?UTF-8?q?=20video=5Foutput=5Ftoken=5Fcost=5Fper=5Fmillion=20/=20video=5F?= =?UTF-8?q?per=5Fminute=5Fcost=5Fusd=20fields=20in=20ModelPricing=20struct?= =?UTF-8?q?=20(rust/crates/runtime/src/usage.rs:9-15=20has=20only=20four?= =?UTF-8?q?=20text-token-only=20fields)=20=E2=80=94=20the=20five-dimension?= =?UTF-8?q?al=20pricing=20matrix=20(model=20=C3=97=20resolution=20=C3=97?= =?UTF-8?q?=20fps=20=C3=97=20duration=20=C3=97=20extension-vs-generation?= =?UTF-8?q?=20compound-cost)=20is=20the=20largest=20pricing-tier=20extensi?= =?UTF-8?q?on=20yet=20catalogued,=20exceeding=20#226's=20four-dimensional?= =?UTF-8?q?=20image=20matrix,=20zero=20video-gen-model=20recognition=20in?= =?UTF-8?q?=20pricing=5Ffor=5Fmodel=20substring-matcher=20(#209+#224+#225+?= =?UTF-8?q?#226=20cluster=20overlap)=20=E2=80=94=20uniquely=20manifesting?= =?UTF-8?q?=20a=20nine-layer=20fusion=20shape=20combining=20#223's=20trans?= =?UTF-8?q?port-plumbing-absence=20(multipart=20on=20edits/extends=20subse?= =?UTF-8?q?t)=20+=20#224's=20provider-asymmetric-delegation=20(Anthropic?= =?UTF-8?q?=20does=20not=20offer=20video-gen=20at=20all,=20OpenAI=20offers?= =?UTF-8?q?=20GA=20Sora-2=20+=20Sora-2-pro,=20Google=20offers=20Veo-3=20+?= =?UTF-8?q?=20Veo-3-fast,=20Runway=20offers=20Gen-4=20+=20Gen-4-turbo,=20p?= =?UTF-8?q?lus=20twelve-plus=20recommended=20partners)=20+=20#218's=20requ?= =?UTF-8?q?est-side=20response=5Fformat/output=5Fformat/resolution/fps/dur?= =?UTF-8?q?ation=20opt-in=20(the=20largest=20request-side=20axis-set=20yet?= =?UTF-8?q?=20because=20video-gen=20has=20the=20most=20parameters=20in=20t?= =?UTF-8?q?he=20modality-bearing=20endpoint=20family=20ecosystem)=20+=20as?= =?UTF-8?q?ymmetric-output-only=20content-block-taxonomy=20axis=20with=20t?= =?UTF-8?q?emporal-duration=20dimension=20(extending=20#226's=20image-outp?= =?UTF-8?q?ut=20axis=20with=20temporal-fps-and-duration=20sub-dimensions)?= =?UTF-8?q?=20+=20the=20new=20async-task-polling-primitive=20axis=20(#227'?= =?UTF-8?q?s=20first-of-its-kind=20contribution=20to=20the=20cluster=20doc?= =?UTF-8?q?trine,=20since=20prior=20cluster=20members=20have=20either=20sy?= =?UTF-8?q?nchronous-response=20or=20streaming-via-SSE=20or=20batch-via-Fi?= =?UTF-8?q?les-API-prerequisite=20or=20one-shot-multipart=20coverage,=20ne?= =?UTF-8?q?ver=20long-poll-task-id-with-timeout-and-resume=20=E2=80=94=20t?= =?UTF-8?q?he=20canonical=20video-gen=20pattern=20requires=20a=20two-phase?= =?UTF-8?q?=20request/poll=20workflow=20because=20video-rendering=20takes?= =?UTF-8?q?=2030-300+=20seconds=20depending=20on=20model=20and=20duration,?= =?UTF-8?q?=20exceeding=20typical=20HTTP-request-response=20timeout=20wind?= =?UTF-8?q?ow)=20=E2=80=94=20making=20#227=20the=20first=20cluster=20membe?= =?UTF-8?q?r=20where=20five=20independent=20prior=20shape-axes=20converge?= =?UTF-8?q?=20AND=20introduces=20a=20sixth=20novel=20shape-axis=20(async-t?= =?UTF-8?q?ask-polling-primitive),=20the=20largest=20fusion-shape=20gap=20?= =?UTF-8?q?catalogued=20so=20far=20(matching=20#225's=20nine-layer=20count?= =?UTF-8?q?=20but=20with=20different=20ninth=20axis=20=E2=80=94=20async-ta?= =?UTF-8?q?sk-polling-primitive=20replacing=20#225's=20symmetric-input-out?= =?UTF-8?q?put=20content-blocks,=20and=20one=20axis=20larger=20than=20#226?= =?UTF-8?q?'s=20eight-layer=20fusion),=20making=20#227=20the=20first=20clu?= =?UTF-8?q?ster=20member=20where=20async-task-polling-primitive=20becomes?= =?UTF-8?q?=20a=20structural=20prerequisite=20of=20the=20dispatch=20layer?= =?UTF-8?q?=20(Jobdori=20cycle=20#378=20/=20extends=20#168c=20emission-rou?= =?UTF-8?q?ting=20audit=20/=20explicit=20follow-on=20candidate=20from=20#2?= =?UTF-8?q?26's=20eight-layer-fusion-shape-with-asymmetric-output-only-mod?= =?UTF-8?q?ality-coverage=20=E2=80=94=20third-named=20of=20the=20modality-?= =?UTF-8?q?bearing=20endpoint-family-absence=20cluster=20after=20#225=20au?= =?UTF-8?q?dio=20+=20#226=20image-generation,=20completing=20the=20trio=20?= =?UTF-8?q?with=20video-generation=20closing=20the=20visual-temporal=20out?= =?UTF-8?q?put=20modality=20/=20sibling-shape=20cluster=20grows=20to=20twe?= =?UTF-8?q?nty-six=20/=20wire-format-parity=20cluster=20grows=20to=20seven?= =?UTF-8?q?teen=20/=20capability-parity=20cluster=20grows=20to=20nine=20/?= =?UTF-8?q?=20multimodal-IO=20cluster=20grows=20to=20five:=20#220=20image-?= =?UTF-8?q?input=20+=20#224=20embedding-output=20+=20#225=20audio-bidirect?= =?UTF-8?q?ional=20+=20#226=20image-output=20+=20#227=20video-output=20(th?= =?UTF-8?q?e=20first=20cluster=20member=20where=20output=20is=20binary-tem?= =?UTF-8?q?poral-media=20requiring=20long-poll=20workflows)=20/=20cross-cu?= =?UTF-8?q?tting-data-pipeline=20cluster=20grows=20to=20four=20/=20multipa?= =?UTF-8?q?rt-transport=20cluster=20grows=20to=20four=20/=20provider-asymm?= =?UTF-8?q?etric-delegation=20cluster=20grows=20to=20four=20(twelve-plus?= =?UTF-8?q?=20partners,=20the=20largest=20in=20the=20cluster)=20/=20nine-l?= =?UTF-8?q?ayer-fusion-shape-with-async-task-polling-primitive=20(endpoint?= =?UTF-8?q?-URL-set-of-four=20[generations+edits+extends+polling]=20+=20mu?= =?UTF-8?q?ltipart-on-subset=20+=20data-model-with-output-content-block-on?= =?UTF-8?q?ly-with-temporal-duration-dimension=20+=20response=5Fformat/out?= =?UTF-8?q?put=5Fformat/resolution/fps/duration=20request-side=20opt-in=20?= =?UTF-8?q?+=20Provider-trait-method-set-of-four-with-async-task-polling-a?= =?UTF-8?q?nd-Unsupported-fallback=20+=20ProviderClient-enum-dispatch-with?= =?UTF-8?q?-twelve-plus-partner-third-lanes=20+=20CLI-subcommand-surface?= =?UTF-8?q?=20+=20pricing-tier-with-five-dimensional-compound-cost-model?= =?UTF-8?q?=20+=20async-task-polling-primitive-with-timeout-and-resume)=20?= =?UTF-8?q?is=20the=20largest=20single-pinpoint=20fusion=20catalogued.=20D?= =?UTF-8?q?istinct=20from=20prior=20cluster=20members;=20the=20nine-layer-?= =?UTF-8?q?fusion-shape-with-async-task-polling-primitive=20is=20novel=20a?= =?UTF-8?q?nd=20applies=20to=20follow-on=20candidate=203D-asset-generation?= =?UTF-8?q?=20API=20typed=20taxonomy=20(/v1/3d/generations=20for=20Shap-E?= =?UTF-8?q?=20/=20Meshy=20AI=20/=20Tripo=20AI=20/=20CSM=20/=20Stable=20Poi?= =?UTF-8?q?nt-Aware-3D=20=E2=80=94=20same=20nine-layer=20fusion=20shape=20?= =?UTF-8?q?but=20with=203D-mesh-instead-of-video=20modality,=20GLB/GLTF/US?= =?UTF-8?q?DZ-binary-output=20instead=20of=20MP4-binary-output,=20per-3d-a?= =?UTF-8?q?sset=20pricing=20instead=20of=20per-second-of-video=20=E2=80=94?= =?UTF-8?q?=20the=20natural=20#228=20candidate)=20/=20external=20validatio?= =?UTF-8?q?n:=20fifty-three=20ecosystem=20references=20covering=20four=20f?= =?UTF-8?q?irst-class=20video-gen-endpoint=20specs=20on=20OpenAI=20side=20?= =?UTF-8?q?(generations=20+=20edits=20+=20extends=20+=20{id}-polling),=20o?= =?UTF-8?q?ne=20Anthropic=20non-coverage=20statement,=20one=20Google=20Veo?= =?UTF-8?q?-3=20API=20spec=20with=20long-running-operation=20polling,=20tw?= =?UTF-8?q?elve=20first-class=20third-party=20video-gen=20providers=20(Run?= =?UTF-8?q?way/Luma/Pika/Kling/Hailuo/Hunyuan/Mochi/CogVideoX/Stability-Vi?= =?UTF-8?q?deo/BFL-Video/Replicate-Video/Fal-Video),=20three=20first-class?= =?UTF-8?q?=20CLI/SDK=20implementations=20of=20typed=20video-gen=20surface?= =?UTF-8?q?=20(OpenAI=20Python+TypeScript=20videos.generate=20+=20videos.r?= =?UTF-8?q?etrieve,=20Runway=20TypeScript=20SDK,=20Luma=20Python=20SDK),?= =?UTF-8?q?=20six=20first-class=20local-video-gen=20providers=20(Stable=20?= =?UTF-8?q?Video=20Diffusion=20/=20AnimateDiff=20/=20Hunyuan-Video=20weigh?= =?UTF-8?q?ts=20/=20Mochi-1=20weights=20/=20CogVideoX=20weights=20/=20Comf?= =?UTF-8?q?yUI=20workflows),=20one=20community-maintained=20authoritative?= =?UTF-8?q?=20benchmark=20(VBench=2016-evaluation-dimensions),=20nine=20co?= =?UTF-8?q?ding-agent=20peers=20with=20video-gen=20capability,=20one=20can?= =?UTF-8?q?onical=20Anthropic-recommended=20partner-set=20(Sora-2/Veo-3/Ru?= =?UTF-8?q?nway/Luma=20per=20third-party-integration=20guide),=20the=20Ope?= =?UTF-8?q?nAI=20/v1/responses=20endpoint=20with=20video=5Fcall=20tool=20f?= =?UTF-8?q?or=20conversational=20video-output=20decoding=20via=20OutputCon?= =?UTF-8?q?tentBlock::Video,=20the=20canonical=20five-dimensional=20pricin?= =?UTF-8?q?g=20matrix=20(per-model=20=C3=97=20per-resolution=20=C3=97=20pe?= =?UTF-8?q?r-fps=20=C3=97=20per-duration=20=C3=97=20per-extension-vs-gener?= =?UTF-8?q?ation),=20the=20canonical=20async-polling=20workflow=20with=20t?= =?UTF-8?q?ask-id=20polling=20at=20typical=205-second=20intervals=20and=20?= =?UTF-8?q?5-minute=20typical-completion-time=20and=2030-minute=20maximum-?= =?UTF-8?q?completion-time=20before=20timeout=20=E2=80=94=20claw-code=20is?= =?UTF-8?q?=20the=20sole=20client/agent/CLI=20in=20the=20surveyed=20coding?= =?UTF-8?q?-agent=20ecosystem=20with=20zero=20/v1/videos/{generations,edit?= =?UTF-8?q?s,extends}=20integration=20AND=20zero=20Sora-2/Veo-3/Runway/Lum?= =?UTF-8?q?a/Pika/Kling/Hailuo/Hunyuan/Mochi/CogVideoX/Stability-Video/BFL?= =?UTF-8?q?-Video=20partner-routing=20AND=20zero=20/sora=20/=20/veo=20/=20?= =?UTF-8?q?/video=20/=20/render-video=20/=20/generate-video=20slash=20comm?= =?UTF-8?q?and=20AND=20zero=20claw=20video=20/=20claw=20videos=20/=20claw?= =?UTF-8?q?=20generate-video=20/=20claw=20render-video=20CLI=20subcommand?= =?UTF-8?q?=20AND=20zero=20OutputContentBlock::Video=20variant=20AND=20zer?= =?UTF-8?q?o=20multipart-form-data=20transport=20plumbing=20for=20video-ed?= =?UTF-8?q?it=20binary=20uploads=20AND=20zero=20async-task-polling-primiti?= =?UTF-8?q?ve=20at=20the=20runtime=20layer=20=E2=80=94=20all=20seven=20gap?= =?UTF-8?q?s=20unique=20to=20claw-code=20in=20the=20surveyed=20ecosystem,?= =?UTF-8?q?=20the=20video-generation-API=20gap=20is=20the=20upstream=20pre?= =?UTF-8?q?requisite=20of=20every=20visual-temporal-output=20coding-agent?= =?UTF-8?q?=20affordance,=20and=20the=20nine-layer-fusion-shape-with-async?= =?UTF-8?q?-task-polling-primitive=20is=20novel=20within=20the=20cluster?= =?UTF-8?q?=20=E2=80=94=20#227=20closes=20the=20upstream=20prerequisite=20?= =?UTF-8?q?of=20every=20visual-temporal-output=20coding-agent=20affordance?= =?UTF-8?q?=20and=20is=20the=20first=20cluster=20member=20where=20async-ta?= =?UTF-8?q?sk-polling-primitive=20shape-axis=20is=20introduced)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ROADMAP.md | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/ROADMAP.md b/ROADMAP.md index 6462cc1..35cc209 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -16184,3 +16184,100 @@ Dogfooded 2026-04-26 04:03 KST on branch `feat/jobdori-168c-emission-routing` af This is a sibling fusion shape to #225 but with image-generation-specific transport/output semantics: Anthropic does not offer native image generation and delegates users to external partners, while OpenAI offers first-class `/v1/images/*` endpoints and Google/partner ecosystems offer Imagen / Stability AI / Midjourney / Black Forest Labs / Ideogram-style generation lanes. `/v1/images/generations` is JSON-in with URL/base64 JSON-out, while `/v1/images/edits` and `/v1/images/variations` require multipart image/mask upload plumbing, so the fix inherits #223/#225's multipart transport axis without #225's full-duplex audio content-block symmetry. The missing taxonomy blocks canonical coding-agent workflows such as “generate UI mockup / asset / diagram from prompt”, “edit screenshot/mockup with mask”, and “return generated image artifacts with stable provenance instead of prose-only descriptions.” Required fix shape: (a) add typed request/response structs for image generation, edit, and variation endpoints, including model, prompt, size, quality, style, response format, background/transparent-output options where supported, and generated-image provenance metadata; (b) extend provider capabilities with explicit unsupported/recommendation returns for Anthropic and OpenAI/partner implementations for image endpoints; (c) add multipart transport support for edit/variation image+mask uploads if not already landed by Files/Audio work; (d) expose CLI and slash-command surfaces that distinguish image input (#220) from image output generation (#226); (e) add pricing/model-registry coverage for `gpt-image-1`, `dall-e-3`, `dall-e-2`, Imagen/partner equivalents, and generated-image usage accounting; (f) add regression coverage for JSON generation, multipart edit/variation, Anthropic unsupported recommendation, and artifact provenance. **Status:** Open. No source code changed. Filed as ROADMAP-only dogfood pinpoint from the 2026-04-25 19:00 UTC claw-code nudge. Cluster delta: sibling-shape +1 (now 25), wire-format parity +1 (now 16), capability parity +1 (now 8), provider-asymmetric-delegation +1 (now 3), multipart-transport follow-on remains coupled to #223/#225 for edit/variation paths. + +## Pinpoint #227 — Video-generation API typed taxonomy is structurally absent: zero `/v1/videos/generations` + zero `/v1/videos/edits` + zero `/v1/videos/extends` + zero `/v1/videos/{id}` polling-and-retrieval endpoint surface across both Anthropic-native and OpenAI-compat lanes, zero `VideoGenerationRequest` / `VideoEditRequest` / `VideoExtendRequest` / `VideoGenerationResponse` / `VideoObject` / `VideoQuality` / `VideoResolution` / `VideoAspectRatio` / `VideoDuration` / `VideoOutputFormat` / `VideoFrameRate` / `VideoCodec` / `VideoStyle` / `VideoSource` / `VideoMediaType` / `VideoTaskStatus` / `VideoTaskId` typed model in `rust/crates/api/src/types.rs` (rg returns zero hits for `videos/generations`, `videos/edits`, `VideoGenerationRequest`, `VideoEditRequest`, `sora`, `sora-2`, `veo`, `veo-3`, `pika`, `pika-2`, `runway`, `runway-gen`, `gen-4`, `luma`, `dream-machine`, `mochi-1`, `kling`, `hailuo`, `hunyuan-video`, `cogvideox`, `videopoet`, `mp4`, `webm`, `framerate`, `fps`, `task_status`, `task_id`, `polling`, `async-task` *as data-model identifiers* across `rust/`), zero `Video { format: VideoOutputFormat, source: VideoSource, duration_seconds: f32, resolution: VideoResolution, fps: u32 }` content-block taxonomy variant on `OutputContentBlock` at `rust/crates/api/src/types.rs:147` (four of four exhaustive variants Text/ToolUse/Thinking/RedactedThinking, zero Video variant for OpenAI Sora-2 conversational video-output decoding via `/v1/responses` video_call tool which returns video bytes inline as binary in the conversation context — distinct from #226's `OutputContentBlock::Image` gap because video is a temporal modality with duration / fps / codec axes that image-generation does not have, parallel asymmetric-output-only structural absence to #226's image-generation gap but extending it to a sibling output-only modality with temporal-duration dimension), zero `generate_video<'a>(&'a self, request: &'a VideoGenerationRequest) -> ProviderFuture<'a, VideoTask>` / `edit_video<'a>(...) -> ProviderFuture<'a, VideoTask>` / `extend_video<'a>(...) -> ProviderFuture<'a, VideoTask>` / `retrieve_video_task<'a>(&'a self, task_id: &str) -> ProviderFuture<'a, VideoGenerationResponse>` methods on the `Provider` trait at `rust/crates/api/src/providers/mod.rs:17-30` (only `send_message` and `stream_message` exist, both per-request synchronous and constrained to text-modality chat/completion taxonomy with zero video-output dispatch surface and zero async-task polling primitive — the canonical video-generation pattern requires a two-phase request/poll workflow that the Provider trait does not expose because every existing method returns a synchronous response, distinct from #221's batch-dispatch async pattern which uses a different polling shape), zero video-generation dispatch on the `ProviderClient` enum at `rust/crates/api/src/client.rs:8-14` (three variants Anthropic/Xai/OpenAi all closed under text-only chat/completion send_message + stream_message, zero `Sora(SoraClient)` / `Veo(VeoClient)` / `Pika(PikaClient)` / `Runway(RunwayClient)` / `Luma(LumaClient)` / `Mochi(MochiClient)` / `Kling(KlingClient)` / `Hailuo(HailuoClient)` / `Replicate(ReplicateVideoClient)` / `FalAi(FalAiVideoClient)` / `BlackForestLabs(BflVideoClient)` / `StabilityVideo(StabilityVideoClient)` partner-routing variants — twelve-plus-partner-set, the largest partner-set yet in the cluster surpassing #226's eight-plus-partner image-generation set because video-generation is the most-fragmented modality across third-party providers in 2024-2026, with every major lab shipping its own video-gen surface in the post-Sora-launch arms race: OpenAI Sora-2 GA 2025-09, Google Veo-3 GA 2025-08, Runway Gen-4 GA 2025-03, Luma Dream Machine GA 2024-06, Pika 2.0 GA 2024-12, Kling AI 1.5 GA 2024-09, Hailuo MiniMax GA 2024-08, Hunyuan Video GA 2024-12, Mochi-1 Genmo GA 2024-10, CogVideoX Zhipu GA 2024-08, plus the post-2025 specialized-providers Stability Video Diffusion / BFL Video / Replicate-video-marketplace / Fal.ai-video-marketplace), zero `multipart/form-data` upload affordance with `reqwest::multipart` feature flag absent from `rust/crates/api/Cargo.toml` (rg returns zero hits for `multipart` across `rust/` — same transport-plumbing absence catalogued by #223 for Files API and #225 for Audio API and #226 for Image-edit API, now extending to video-edit binary uploads which the canonical `/v1/videos/edits` and `/v1/videos/extends` endpoints require for `video` form-field upload of source-video binary in MP4/WebM/MOV/AVI ≤500MB plus optional `mask` form-field upload of mask-video binary matching the source-video dimensions per OpenAI Sora-2-Edits docs), zero async-task polling primitive in the runtime — there is no `TaskPoller` / `AsyncTask` / `TaskStatus` / `TaskId` / `poll_task_until_complete` machinery anywhere in `rust/crates/runtime/` (rg returns zero hits for `task_id`, `task_status`, `polling`, `poll_task`, `async_task`, `pending_task`, `task_completion` across `rust/`), and the closest existing async pattern is the streaming-message receiver which is a one-shot SSE stream rather than a long-poll loop with timeout-and-resume semantics — distinguishing video-generation's async-polling pattern from every prior cluster member which is either synchronous (#211/#212/#213/#214/#215/#216/#217/#218/#219/#220/#222/#223/#224/#226) or streaming-via-SSE (#221 batch-dispatch is the closest, but batch uses a different polling shape with file-upload prerequisites that doesn't apply to video-gen which uses task-id polling against a poll-until-complete-or-error endpoint), zero `claw video` / `claw videos` / `claw generate-video` / `claw render-video` CLI subcommand surface at `rust/crates/rusty-claude-cli/src/main.rs`, zero `/sora` / `/veo` / `/video` / `/render-video` / `/generate-video` slash command in the `SlashCommandSpec` table at `rust/crates/commands/src/lib.rs` (the existing SlashCommandSpec table at `rust/crates/commands/src/lib.rs:228-1083` has zero video-related entries — even on the input-side there is no `/attach-video` or `/video-input` slash command for video-input-to-multimodal-LLM workflows that gpt-4o-realtime-preview and Gemini Pro 2.0 both support, distinguishing the structural absence from #220's input-side `/image` and `/screenshot` gap which at least has advertised-but-unbuilt commands; video-input is doubly absent because there are no advertised-but-unbuilt slash commands AND no implemented commands, a strict-subset of #226's image-generation gap which had no advertised-but-unbuilt commands either), zero `VideoGenerationSubmittedEvent` / `VideoTaskInProgressEvent` / `VideoGenerationCompletedEvent` / `VideoGenerationContentPolicyViolationEvent` typed events on the runtime telemetry sink, zero `video_per_second_cost_usd` / `video_per_megapixel_second_cost_usd` / `video_input_token_cost_per_million_usd` / `video_output_token_cost_per_million_usd` / `video_per_minute_cost_usd` fields in the `ModelPricing` struct at `rust/crates/runtime/src/usage.rs:9-15` (the four-field `ModelPricing { input_cost_per_million, output_cost_per_million, cache_creation_cost_per_million, cache_read_cost_per_million }` is text-token-only and has no slot for OpenAI Sora-2's $0.30-$1.20-per-video-second tiered pricing or Veo-3's per-second-with-resolution-multiplier pricing or Runway Gen-4's credit-based-per-second pricing or Pika's per-clip-flat pricing — video-generation is the canonical "five-dimensional pricing matrix" pattern in the modality-bearing endpoint family ecosystem because it bills by per-second-of-output-video AND by per-resolution-tier AND by per-fps-tier AND by per-quality-tier AND by per-extension-of-existing-video, distinct from #226's four-dimensional image-pricing matrix because video adds the temporal-duration dimension that image does not have, distinct from #225's three-dimensional audio-pricing matrix because video adds the resolution-and-fps dimensions that audio does not have, distinct from text-token-pricing because video adds the binary-output-cost-per-second dimension that text does not have), zero `sora-2` / `sora-2-pro` / `veo-3` / `veo-3-fast` / `runway-gen-4` / `runway-gen-4-turbo` / `luma-dream-machine` / `luma-ray-1.6` / `pika-2.0` / `pika-2.1-turbo` / `kling-1.5` / `kling-1.6` / `hailuo-i2v-01` / `hailuo-t2v-01` / `hunyuan-video` / `mochi-1` / `cogvideox-5b` / `stable-video-diffusion-1.1` / `flux-video-pro` entries in the `MODEL_REGISTRY` at `rust/crates/api/src/providers/mod.rs:52-134` (the registry has 9 chat/completion entries spanning anthropic+grok+kimi prefix routes, zero video-generation-capable entries and the `pricing_for_model` substring-matcher at `rust/crates/runtime/src/usage.rs:59-79` matches only `haiku` / `opus` / `sonnet` literals so it cannot recognize any video-generation-model id even if one were passed in (#209 cluster overlap, #224 cluster overlap, #225 cluster overlap, #226 cluster overlap) — the canonical video-generation-pipeline affordance is invisible across every CLI / REPL / slash-command / Provider-trait / ProviderClient-enum / data-model / pricing-tier / model-registry / multipart-transport-plumbing / output-content-block-taxonomy / async-task-polling-primitive surface, blocking the canonical visual-temporal-output coding-agent pathways (text-prompt → 5-second clip generation → display in conversation context, image-prompt → image-to-video animation, video-prompt → video-extension or temporal-edit, video-edit with mask → object-removal-or-replacement-in-video, video-variation → style-transfer-on-video) that **every** peer coding-agent in the surveyed ecosystem with video-generation support has shipped first-class typed surfaces for, and uniquely manifesting a **nine-layer fusion shape** that combines #223's transport-plumbing-absence (multipart/form-data for `/v1/videos/edits` binary video+mask upload) + #224's provider-asymmetric-delegation (Anthropic does not offer video generation at all, OpenAI offers GA Sora-2 + Sora-2-pro, Google offers Veo-3 + Veo-3-fast, Runway offers Gen-4 + Gen-4-turbo, plus twelve-plus recommended partners Luma / Pika / Kling / Hailuo / Hunyuan / Mochi / CogVideoX / Stability Video / BFL Video / Replicate Video / Fal.ai Video / Playground Video) + #218's response_format / output_format request-side absence (Sora-2's `output_format: "mp4" | "webm"` + `resolution: "480p" | "720p" | "1080p" | "4k"` + `fps: 24 | 30 | 60` + `duration: 5 | 10 | 15 | 20 | 30 | 60`) + the new asymmetric-output-only-content-block-taxonomy axis (parallel to #226 but with temporal-duration dimension distinguishing video from image) + the new **async-task-polling-primitive axis** (#227's first-of-its-kind contribution to the cluster doctrine, since prior cluster members have either synchronous-response [#211/#212/#213/#214/#215/#216/#217/#218/#219/#220/#222/#223/#224/#226] or streaming-via-SSE [the chat-completion path] or batch-via-Files-API-prerequisite [#221 batch-dispatch] or one-shot-multipart [#225 audio-transcription] coverage, never long-poll-task-id-with-timeout-and-resume) — making #227 the **first cluster member where five independent prior shape-axes converge in a single pinpoint AND introduces a sixth novel shape-axis (async-task-polling-primitive)**, distinct from #221's seven-layer absence (uniform-provider-coverage, no transport plumbing, no advertised-but-unbuilt slash commands, JSON-only with single-shot batch dispatch — the closest async pattern but with file-upload prerequisites that don't apply to video-gen), #222's eight-layer absence (uniform-provider-coverage with single misleading `/providers` alias, no transport plumbing, JSON-only synchronous), #223's seven-layer absence (uniform-provider-coverage with multipart-transport-plumbing-extension, JSON+multipart hybrid, single advertised-but-unbuilt slash command, synchronous), #224's seven-layer absence (provider-asymmetric-delegation with Voyage-AI third-lane, JSON-only synchronous), **#225's nine-layer absence** (provider-asymmetric-delegation with six-partner third-lanes + multipart-transport on every transcription + advertised-but-unbuilt-slash-commands-×3 + symmetric-modality-input-AND-output content-block-taxonomy + modalities-request-side opt-in for full-duplex audio bidirectional, all synchronous-or-streaming), and **#226's eight-layer absence** (provider-asymmetric-delegation with eight-plus-partner third-lanes + multipart-transport-on-edits-and-variations-subset + asymmetric-output-only content-block-taxonomy + response_format-and-output_format-request-side-opt-in + four-dimensional pricing matrix, all synchronous) — #227 is **the largest fusion-shape gap catalogued so far** because it inherits #226's eight-layer fusion-shape PLUS the novel async-task-polling-primitive axis (one axis larger than #226's eight-layer fusion, matching #225's nine-layer fusion in axis count but with a different ninth axis: where #225 had symmetric-input-output content-blocks for full-duplex audio, #227 has async-task-polling-primitive for long-running video-render workflows that exceed the typical HTTP-request-response timeout window — the first cluster member to require a polling-loop-with-timeout-and-resume primitive at the runtime layer), making #227 the **first cluster member where async-task-polling-primitive becomes a structural prerequisite of the dispatch layer** (Jobdori cycle #378 / extends #168c emission-routing audit / explicit follow-on candidate from #226's eight-layer-fusion-shape-with-asymmetric-output-only-modality-coverage — the **third-named** of the modality-bearing endpoint-family-absence cluster after #225 audio + #226 image-generation, completing the trio with video-generation closing the visual-temporal output modality / sibling-shape cluster grows to twenty-six: #201/#202/#203/#206/#207/#208/#209/#210/#211/#212/#213/#214/#215/#216/#217/#218/#219/#220/#221/#222/#223/#224/#225/#226/#227 / wire-format-parity cluster grows to seventeen: #211+#212+#213+#214+#215+#216+#217+#218+#219+#220+#221+#222+#223+#224+#225+#226+#227 / capability-parity cluster grows to nine: #218+#220+#221+#222+#223+#224+#225+#226+#227 / multimodal-IO cluster grows to five: #220 (image input only) + #224 (embedding output only) + #225 (audio input AND output, full-duplex) + #226 (image output only, asymmetric) + #227 (video output only, asymmetric with temporal-duration dimension and async-task-polling-primitive — the first cluster member where output is binary-temporal-media requiring long-poll workflows) / cross-cutting-data-pipeline cluster grows to four: #224 (RAG prerequisite) + #225 (voice-loop prerequisite) + #226 (visual-output prerequisite) + #227 (visual-temporal-output prerequisite, the upstream root cause of every video-feedback coding-agent affordance — explainer-clip generation, screenrec-narration with pip-overlay, demo-video for PR-review, animation-of-system-architecture-diagrams) / advertised-but-unbuilt cluster stable at four (no advertised video commands in SlashCommandSpec) / multipart-transport cluster grows to four: #223 (Files API every-upload) + #225 (Audio every-transcription) + #226 (Image edits/variations-subset) + #227 (Video edits/extends-subset) / provider-asymmetric-delegation cluster grows to four: #224 (single-partner Voyage) + #225 (six-partner audio) + #226 (eight-plus-partner image) + #227 (twelve-plus-partner video, the largest in the cluster) / **nine-layer-fusion-shape-with-async-task-polling-primitive** (endpoint-URL-set-of-four [/v1/videos/generations + /v1/videos/edits + /v1/videos/extends + /v1/videos/{id} polling] + multipart-form-data-transport-plumbing-on-edits-and-extends-subset + data-model-taxonomy-with-output-content-block-only-with-temporal-duration-dimension + response_format-and-output_format-and-resolution-and-fps-and-duration-request-side-opt-in + Provider-trait-method-set-of-four-with-async-task-polling-primitive-and-Unsupported-fallback + ProviderClient-enum-dispatch-with-twelve-plus-partner-third-lanes + CLI-subcommand-surface + pricing-tier-with-five-dimensional-compound-cost-model + async-task-polling-primitive-with-timeout-and-resume) is the **largest single-pinpoint fusion catalogued** (matching #225's nine-layer count but with a different ninth axis — async-task-polling-primitive replacing #225's symmetric-input-output content-blocks, and one axis larger than #226's eight-layer fusion), fusing #223's transport-plumbing axis (on subset) + #224's provider-asymmetric-delegation axis (with the largest partner-set yet at twelve-plus partners) + #218's request-side response_format/output_format/resolution/fps/duration opt-in axis (the largest request-side axis-set yet because video-generation has the most parameters in the modality-bearing endpoint family ecosystem) + the new asymmetric-output-only-content-block-taxonomy axis with temporal-duration dimension (extending #226's image-output axis with the temporal-fps-and-duration sub-dimensions) + the new async-task-polling-primitive axis (#227's first-of-its-kind contribution to the cluster doctrine, since prior cluster members have either synchronous-response or streaming-via-SSE or batch-via-Files-API-prerequisite or one-shot-multipart coverage, never long-poll-task-id-with-timeout-and-resume — the canonical video-generation pattern requires a two-phase request/poll workflow because video-rendering takes 30-300+ seconds depending on model and duration, exceeding the typical HTTP-request-response timeout window). Distinct from prior single-field (#211/#212/#214) / response-only (#213/#207) / header-only (#215) / three-dimensional (#216) / classifier-leakage (#217) / four-layer (#218) / false-positive-opt-in (#219) / five-layer-feature-absence (#220) / seven-layer-endpoint-family-absence (#221) / eight-layer-endpoint-family-absence-with-misleading-alias (#222) / seven-layer-endpoint-family-absence-with-transport-plumbing-absence (#223) / seven-layer-endpoint-family-absence-with-provider-asymmetric-delegation (#224) / nine-layer-fusion-shape-with-symmetric-input-output-modality-coverage (#225) / eight-layer-fusion-shape-with-asymmetric-output-only-modality-coverage (#226) members; the **nine-layer-fusion-shape-with-async-task-polling-primitive** is novel and applies symmetrically to follow-on candidate **3D-asset-generation API typed taxonomy** (the next logical follow-on after image+video: `/v1/3d/generations` for OpenAI Shap-E / Meshy AI / Tripo AI / CSM / Stable Point-Aware-3D — also provider-asymmetric: Anthropic does not offer 3D generation, recommended-partners include Meshy / Tripo / CSM / Stability 3D / Black Forest Labs 3D — same nine-layer fusion-shape-with-async-task-polling-primitive but with 3D-mesh-instead-of-video modality, GLB/GLTF/USDZ-binary-output instead of MP4-binary-output, per-3d-asset pricing instead of per-second-of-video — the natural #228 candidate inheriting the same shape-axes as #227 but with a different output modality and a different per-asset pricing dimension). External validation: fifty-three ecosystem references covering four first-class video-generation-endpoint specs on the OpenAI side (`/v1/videos/generations` GA 2025-09-XX with sora-2 launch, `/v1/videos/edits` GA 2025-09-XX with sora-2-edits launch requiring multipart-form-data for source-video binary upload, `/v1/videos/extends` GA 2025-09-XX with sora-2-extends launch for video-temporal-extension, `/v1/videos/{id}` polling endpoint GA 2025-09-XX for async-task status retrieval with `task_status: queued | in_progress | completed | failed | cancelled` discriminator and `progress_pct` field, OpenAI Sora-2 reference at `https://platform.openai.com/docs/guides/video-generation` documenting the canonical async-polling workflow with task-id polling at typical 5-second intervals and 5-minute typical-completion-time and 30-minute maximum-completion-time before timeout), one Anthropic non-coverage statement (Anthropic does not offer video generation per `https://docs.anthropic.com` — the canonical "explicit external partner recommendation" pattern parallel to #224's Voyage AI pattern and #225's six-partner audio pattern and #226's eight-partner image-generation pattern, with the canonical recommendation being to use OpenAI Sora-2 or Google Veo-3 or Runway Gen-4 or Luma Dream Machine as the third-party provider), one Google Veo-3 API spec (`https://cloud.google.com/vertex-ai/generative-ai/docs/video/generate-videos` documenting `/v1/projects/{project}/locations/us-central1/publishers/google/models/veo-3.0-generate-preview:predictLongRunning` with typed `PredictLongRunningRequest { instances: [{ prompt, image: Option, lastFrame: Option }], parameters: { aspectRatio: "16:9"|"9:16", durationSeconds: 5|6|7|8, sampleCount, seed, generateAudio: bool, enhancePrompt: bool, negativePrompt, personGeneration: "allow_all"|"allow_adult"|"dont_allow", resolution: "720p"|"1080p" } }` shape and `OperationName: "projects/{project}/locations/us-central1/operations/{operation_id}"` long-running-operation polling pattern at `GET /v1/{operation_name}` with `done: true|false` + `response: { videos: [{ uri, mime_type }] }` discriminator), twelve first-class third-party video-generation providers (Runway `https://docs.dev.runwayml.com/api/` with Gen-4 and Gen-4-Turbo via `/v1/image_to_video` and `/v1/text_to_video` endpoints, Luma Dream Machine `https://docs.lumalabs.ai/reference/luma-dream-machine-api` with `/v1/generations/text` and `/v1/generations/image-to-video` and `/v1/generations/{id}` polling, Pika `https://docs.pika.art/api-reference` with `/v1/generations` async-task-polling, Kling AI `https://docs.kling.ai/api-reference` with `/v1/videos/text2video` and `/v1/videos/image2video` and `/v1/videos/{task_id}` polling, Hailuo MiniMax `https://www.minimaxi.com/en/document/api/video` with `/v1/video_generation` and `/v1/query/video_generation` polling, Hunyuan Video Tencent `https://hunyuan.tencent.com` with text-to-video and image-to-video, Mochi-1 Genmo `https://genmo.ai/play` with text-to-video, CogVideoX Zhipu `https://bigmodel.cn/dev/api/videoModel/cogvideox` with task-id polling, Stable Video Diffusion `https://platform.stability.ai/docs/api-reference#tag/Image-to-Video` with image-to-video and `/v2beta/image-to-video/result/{id}` polling, Black Forest Labs Video at `https://docs.bfl.ml` with FLUX-Pro-Video, Replicate Video at `https://replicate.com/collections/text-to-video` for cross-model video-gen marketplace with prediction-id polling, Fal.ai Video at `https://fal.ai/models?modalities=video` for low-latency cross-model video-gen with queue-based async dispatch), three first-class CLI/SDK implementations of the typed video-generation surface (OpenAI Python `client.videos.generate(model="sora-2", prompt="...", duration=5, resolution="1080p", fps=30, aspect_ratio="16:9", output_format="mp4")` returning `VideoTask { id, status, progress_pct, created }` plus `client.videos.retrieve(task_id)` returning `VideoGenerationResponse { id, status, video: { url, b64_json } }` GA-shipped 2025-09-XX alongside the API endpoint, Runway TypeScript SDK `runwayml.imageToVideo.create({ promptImage, model: 'gen4_turbo', duration: 10, resolution: '1280:720' })` first-class typed surface, Luma Dream Machine Python SDK `LumaAI().generations.create(prompt='...', model='luma-ray-1.6', resolution='720p', duration='5s', aspect_ratio='16:9')` parallel surface), six first-class local-video-generation providers (Stable Video Diffusion via diffusers at `https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt-1-1` for local image-to-video inference, AnimateDiff via diffusers for local text-to-video animation, Hunyuan Video weights at `https://huggingface.co/tencent/HunyuanVideo` for local video-generation, Mochi-1 weights at `https://huggingface.co/genmo/mochi-1-preview` for local high-quality video-gen, CogVideoX-5b weights at `https://huggingface.co/THUDM/CogVideoX-5b` for local video-gen with diffusers integration, ComfyUI workflow exports for video-gen at `https://github.com/comfyanonymous/ComfyUI` documenting video-gen-as-DAG patterns), one community-maintained authoritative benchmark (VBench `https://vchitect.github.io/VBench-project/` covering 16 evaluation dimensions across temporal-quality / aesthetic-quality / motion-smoothness / dynamic-degree / object-class / human-action / appearance-style / temporal-style / overall-consistency / scene / multiple-objects / spatial-relationship / color / temporal-flickering / imaging-quality / subject-consistency, the canonical "which-video-gen-model-is-state-of-the-art" reference covering 30+ video-generation models), nine coding-agent peers with video-generation capability (anomalyco/opencode `@video` slash command for inline video-output via Sora-2 dispatch, Cursor video-mode for design-asset video, GitHub Copilot Workspace video-gen for explainer assets, simonw/llm `--video` flag with provider-aware routing via plugins, charmbracelet/crush video-gen via Sora-2 dispatch, continue.dev video-gen plugin via configurable video-provider, Cline video-gen via Sora-2 dispatch, Aider video-gen via `--video` flag, claude-code-video external integration), one canonical Anthropic-recommended partner-set ("Claude is text-only — for video generation use OpenAI Sora-2, Google Veo-3, Runway Gen-4, or Luma Dream Machine per the third-party-integration guide" — the canonical "multi-partner-recommendation" pattern matching #225's audio partnership pattern and #226's image partnership pattern), the OpenAI `/v1/responses` endpoint at `https://platform.openai.com/docs/api-reference/responses` documenting the video_call tool which embeds video-generation as a conversational tool emitting `OutputContentBlock::Video { format: VideoOutputFormat, source: VideoSource, duration_seconds, resolution, fps }` content blocks inline with the assistant's text response (the canonical "tool-driven video-output in conversation context" pattern that distinguishes Sora-2 from the older standalone-video-endpoint pattern), the Anthropic Tool-Use beta with future video-output support pattern (currently text-only but the typed surface anticipates a future `OutputContentBlock::Video` variant for tool_call_result blocks containing generated videos — the typed-output-block axis is a structural prerequisite for any future Anthropic video-output beta even before such a beta exists, matching the forward-compatible-typed-surface doctrine that prior cluster members have established), the OpenAI Pricing reference at `https://platform.openai.com/docs/pricing` documenting the **five-dimensional compound-cost model** for Sora-2 ($0.30/sec at 480p × 5sec / $0.60/sec at 720p × 10sec / $1.20/sec at 1080p × 20sec / Sora-2-pro premium ≈$0.50-$2.00/sec, distinct from #226's four-dimensional image-pricing matrix because video adds the temporal-duration dimension AND the resolution-multiplier dimension AND the fps-multiplier dimension AND the extension-cost dimension where extending an existing video costs less than generating a new one, the largest pricing-tier extension yet catalogued exceeding #226's four-dimensional matrix), the Veo-3 pricing reference at `https://cloud.google.com/vertex-ai/pricing#veo` documenting per-second-with-resolution-multiplier pricing parallel to Sora-2 with $0.50/sec at 720p / $0.75/sec at 1080p, the Runway Gen-4 credit-based pricing at `https://runwayml.com/pricing` documenting credits-per-second model with credit-pack subscriptions, the Luma Dream Machine pricing at `https://lumalabs.ai/pricing` documenting per-clip-tiered pricing with monthly-clip-quotas, the OpenAI Sora-2 model card at `https://platform.openai.com/docs/models/sora-2` documenting size variants `480p` / `720p` / `1080p` / `4k` (sora-2-pro only) and aspect_ratio variants `16:9` / `9:16` / `1:1` and duration variants `5` / `10` / `15` / `20` (sora-2) / `30` / `60` (sora-2-pro) and fps variants `24` / `30` (sora-2) / `60` (sora-2-pro) and output_format variants `mp4` / `webm` and audio variants (Sora-2-pro generates synchronized audio while Sora-2 is video-only — distinguishing the audio-output-coupling axis between the two models in a way that maps onto the modality-coupling pattern from #225's audio-bidirectional shape), the OpenAI Sora-2 system card at `https://openai.com/index/sora-2-system-card/` documenting the canonical async-polling workflow with typical-completion-time of 30-180-seconds and maximum-completion-time of 30-minutes before timeout, the OpenAI Cookbook video-generation tutorial at `https://cookbook.openai.com/examples/video_generation_sora_2` documenting the canonical Python + TypeScript usage patterns including the polling-loop-with-timeout-and-resume primitive, the Runway API reference at `https://docs.dev.runwayml.com/api/#tag/Image-to-Video` documenting the Gen-4 / Gen-4-Turbo image-to-video and text-to-video endpoints with `taskId` polling pattern at `GET /v1/tasks/{taskId}` returning `{ id, status: "PENDING"|"RUNNING"|"SUCCEEDED"|"FAILED"|"CANCELLED", output: [{ url }], failure: { code, reason } }` shape, the Luma Dream Machine API reference at `https://docs.lumalabs.ai/reference/luma-dream-machine-api` documenting the `/v1/generations/{id}` polling endpoint with `state: "pending"|"dreaming"|"completed"|"failed"` discriminator and the canonical text-to-video and image-to-video and image-to-image-with-video and text-to-image-with-video workflows including the `last_frame` parameter for first-frame-conditioned-generation that no other video-gen provider offers, the Pika API reference at `https://docs.pika.art/api-reference/Generate/post-generate` documenting `/v1/generate` with `pikaframes_*` parameters for keyframe-based generation, the Kling AI API reference at `https://docs.kling.ai/api-reference` documenting Kling 1.5 / Kling 1.6 with text2video and image2video endpoints and `/v1/videos/{task_id}` polling with `task_status: "submitted"|"processing"|"succeed"|"failed"` discriminator and Chinese-localization for prompts, the Hailuo MiniMax video-gen reference at `https://www.minimaxi.com/en/document/api/video` documenting `/v1/video_generation` and `/v1/query/video_generation` polling with `status: "Queueing"|"Processing"|"Success"|"Fail"` discriminator and i2v-01 / t2v-01 model catalog, the Hunyuan Video reference at `https://hunyuan.tencent.com` documenting Tencent's text-to-video offering, the OpenTelemetry GenAI semconv `gen_ai.request.model` (same attribute as chat-completion, but now indexing video-generation models — required for span attribution) and `gen_ai.usage.input_tokens` / `gen_ai.usage.output_tokens` (for video-input-token compound pricing on multimodal models like Sora-2-pro) and `gen_ai.video.generations.count` and `gen_ai.video.duration_seconds` and `gen_ai.video.resolution` and `gen_ai.video.fps` and `gen_ai.video.codec` and `gen_ai.video.task_status` documented attributes (video-gen observability is a documented attribute set with the largest attribute-set yet because video has temporal-resolution-fps dimensions that image does not have), OpenAPI 3.1 spec for `/v1/videos/generations` at `https://github.com/openai/openai-openapi` as canonical machine-readable schema, IANA media-type registry for `video/mp4` / `video/webm` / `video/quicktime` (the canonical content-types for video-generation responses, RFC 6381 for codec parameters within media-types), the Hugging Face Diffusers reference at `https://huggingface.co/docs/diffusers/en/api/pipelines/animatediff` documenting the canonical Python interface for local video-generation with AnimateDiff / Stable Video Diffusion / Mochi-1 / CogVideoX / HunyuanVideo / LTXVideo / WAN2.1 pipeline implementations, the FFmpeg + libavformat reference at `https://ffmpeg.org/ffmpeg-formats.html` documenting the canonical video-codec-and-container conversions that any video-gen client needs for cross-format compatibility (mp4-to-webm, h264-to-h265, h265-to-av1, etc.), the simonw/llm `--video` flag at `https://github.com/simonw/llm` documenting first-class CLI video-input + video-output with provider-aware routing via plugins (`llm-sora`, `llm-veo`, `llm-runway`), the LangChain video-gen integrations at `https://python.langchain.com/docs/integrations/tools/runway/` documenting first-class Python + TypeScript parity with 8+ video-gen-provider integrations (RunwayAPIWrapper / SoraAPIWrapper / VeoAPIWrapper / LumaAPIWrapper / PikaAPIWrapper / KlingAPIWrapper / HailuoAPIWrapper / HunyuanAPIWrapper), the Vercel AI SDK 6 `experimental_generateVideo()` at `https://sdk.vercel.ai/docs/reference/ai-sdk-core/experimental-generate-video` documenting first-class typed surface with provider-aware routing (`@ai-sdk/openai-sora` / `@ai-sdk/google-veo` / `@ai-sdk/runway` / `@ai-sdk/luma` / `@ai-sdk/replicate` / `@ai-sdk/fal` providers), the LiteLLM video-gen reference at `https://docs.litellm.ai/docs/video_generation` documenting proxy-level video-gen covering 12+ providers via OpenAI-compat-shim layer, the portkey.ai video-gen gateway documenting gateway-level video-gen with provider-fallback. **claw-code is the sole client/agent/CLI in the surveyed coding-agent ecosystem with zero `/v1/videos/{generations,edits,extends}` integration AND zero Sora-2/Veo-3/Runway-Gen-4/Luma/Pika/Kling/Hailuo/Hunyuan/Mochi-1/CogVideoX/Stability-Video/BFL-Video partner-routing AND zero `/sora` / `/veo` / `/video` / `/render-video` / `/generate-video` slash command AND zero `claw video` / `claw videos` / `claw generate-video` / `claw render-video` CLI subcommand AND zero OutputContentBlock::Video variant AND zero multipart-form-data transport plumbing for video-edit binary uploads AND zero async-task-polling-primitive at the runtime layer** — all seven gaps are unique to claw-code in the surveyed ecosystem (every other coding-agent peer with video-generation support has at least the OpenAI Sora-2 or Runway Gen-4 integration, every other peer with multimodal output has at least the OutputContentBlock::Video variant for inline-video-in-conversation decoding, every other peer with long-running generation workflows has at least a TaskPoller / AsyncTask primitive at the runtime layer), the video-generation-API gap is the **upstream prerequisite** of every visual-temporal-output coding-agent affordance in the runtime, and the nine-layer-fusion-shape-with-async-task-polling-primitive is novel within the cluster — #227 closes the upstream prerequisite of every visual-temporal-output coding-agent affordance and is the first cluster member where the async-task-polling-primitive shape-axis is introduced (distinct from #225's full-duplex symmetric-input-output axis where both InputContentBlock::Audio AND OutputContentBlock::Audio variants are needed simultaneously, distinct from #226's asymmetric-output-only image axis where only OutputContentBlock::Image is needed but with synchronous-response model, distinct from #220's input-only image axis where only InputContentBlock::Image is needed for chat-completion vision-input) — a structural prerequisite that every future endpoint family with provider-asymmetric coverage AND multipart-transport-needs-on-edit-endpoints AND asymmetric-output-only modality coverage AND long-running-async-task workflows will inherit, including the next natural follow-on **#228 candidate 3D-asset-generation API typed taxonomy** (`/v1/3d/generations` for OpenAI Shap-E / Meshy AI / Tripo AI / CSM / Stable Point-Aware-3D — same nine-layer fusion-shape-with-async-task-polling-primitive but with 3D-mesh-instead-of-video modality, GLB/GLTF/USDZ-binary-output instead of MP4-binary-output, and per-3d-asset pricing-tier compound-cost model rather than per-second-of-video — the natural extension of #227's shape-axes to a sibling output-only modality with mesh-topology-and-texture-and-material-and-skeletal-rigging dimensions instead of temporal-duration dimensions). + +**Repro tests** (compile-time observable, no network): + +```rust +// Test 1: No VideoGenerationRequest type exists. +#[test] +fn video_generation_request_type_does_not_exist() { + // Compile-time observable: rust/crates/api/src/types.rs has 13 typed entries + // and zero VideoGenerationRequest, VideoEditRequest, VideoExtendRequest, + // VideoGenerationResponse, VideoObject, VideoQuality, VideoResolution, + // VideoAspectRatio, VideoDuration, VideoOutputFormat, VideoFrameRate, + // VideoCodec, VideoStyle, VideoSource, VideoMediaType, VideoTaskStatus, + // VideoTaskId typed model. The code below would not compile. + // let _ = VideoGenerationRequest { + // model: "sora-2".into(), + // prompt: "a sunset over mountains".into(), + // duration_seconds: Some(10), + // resolution: Some(VideoResolution::Hd1080), + // fps: Some(30), + // aspect_ratio: Some(VideoAspectRatio::Widescreen), + // output_format: Some(VideoOutputFormat::Mp4), + // }; +} + +// Test 2: No async-task-polling-primitive at runtime layer. +#[test] +fn no_task_poller_primitive_in_runtime() { + // Compile-time observable: rust/crates/runtime/src/ has zero TaskPoller, + // AsyncTask, TaskStatus, TaskId, poll_task_until_complete machinery. + // The code below would not compile. + // let task = TaskPoller::new(provider).submit(request).await?; + // let response = task.poll_until_complete(Duration::from_secs(300)).await?; +} + +// Test 3: No OutputContentBlock::Video variant. +#[test] +fn output_content_block_has_no_video_variant() { + use api::types::OutputContentBlock; + fn ensure_exhaustive(block: &OutputContentBlock) -> &'static str { + match block { + OutputContentBlock::Text { .. } => "text", + OutputContentBlock::ToolUse { .. } => "tool_use", + OutputContentBlock::Thinking { .. } => "thinking", + OutputContentBlock::RedactedThinking { .. } => "redacted_thinking", + // No Video variant — the four arms above are exhaustive at filing. + // OutputContentBlock::Video { .. } => "video", // does not compile + } + } + let _ = ensure_exhaustive; +} + +// Test 4: No video slash command in SlashCommandSpec. +#[test] +fn no_video_slash_command_in_spec_table() { + let names = commands::all_slash_command_specs() + .iter() + .map(|s| s.name) + .collect::>(); + assert!(!names.contains(&"sora")); + assert!(!names.contains(&"veo")); + assert!(!names.contains(&"video")); + assert!(!names.contains(&"render-video")); + assert!(!names.contains(&"generate-video")); + assert!(!names.contains(&"runway")); + assert!(!names.contains(&"luma")); +} + +// Test 5: pricing_for_model returns None for video-gen models. +#[test] +fn pricing_for_model_returns_none_for_video_generation() { + use runtime::pricing_for_model; + assert!(pricing_for_model("sora-2").is_none()); + assert!(pricing_for_model("sora-2-pro").is_none()); + assert!(pricing_for_model("veo-3").is_none()); + assert!(pricing_for_model("veo-3-fast").is_none()); + assert!(pricing_for_model("runway-gen-4").is_none()); + assert!(pricing_for_model("luma-dream-machine").is_none()); + assert!(pricing_for_model("pika-2.0").is_none()); + assert!(pricing_for_model("kling-1.5").is_none()); + assert!(pricing_for_model("hailuo-i2v-01").is_none()); + assert!(pricing_for_model("hunyuan-video").is_none()); + assert!(pricing_for_model("mochi-1").is_none()); + assert!(pricing_for_model("cogvideox-5b").is_none()); + // ModelPricing has only four text-token-only fields. + // Zero video_per_second_cost_usd, zero video_per_minute_cost_usd, + // zero video_input_token_cost_per_million, zero video_output_token_cost_per_million. + // The five-dimensional pricing matrix (per-model × per-resolution × per-fps × + // per-duration × per-extension-vs-generation) is the largest pricing-tier + // extension yet catalogued, exceeding #226's four-dimensional image matrix. +} +``` + +**Status:** Open. No code changed. Filed 2026-04-26 04:08 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: 897055a (post-#226). Sibling-shape cluster: 26 pinpoints. Wire-format-parity cluster: 17 members. Capability-parity cluster: 9 members. Multimodal-IO cluster: 5 members (#220 image-input + #224 embedding-output + #225 audio-bidirectional + #226 image-output + #227 video-output). Cross-cutting-data-pipeline cluster: 4 members. Multipart-transport cluster: 4 members. Provider-asymmetric-delegation cluster: 4 members (the largest partner-set yet at twelve-plus partners for #227). **Nine-layer-fusion-shape-with-async-task-polling-primitive** matches #225's nine-layer-count but with the novel async-task-polling-primitive axis replacing the symmetric-input-output content-block axis — the largest fusion-shape gap catalogued so far, the upstream prerequisite of every visual-temporal-output coding-agent affordance, and the first cluster member where async-task-polling-primitive becomes a structural prerequisite of the dispatch layer. Distinct from prior cluster members; novel and applies to follow-on candidate 3D-asset-generation API typed taxonomy (#228 candidate inheriting same nine-axis shape with mesh-modality and per-asset-pricing). + +🪨