Stop repo lanes from executing the wrong task payload

The next repo-local sweep target was ROADMAP #71: a claw-code lane accepted an unrelated KakaoTalk/image-analysis prompt even though the lane itself was supposed to be repo-scoped work. This extends the existing prompt-misdelivery guardrail with an optional structured task receipt so worker boot can reject visible wrong-task context before the lane continues executing. Constraint: Keep the fix inside the existing worker_boot / WorkerSendPrompt control surface instead of inventing a new external OMX-only protocol Rejected: Treat wrong-task receipts as generic shell misdelivery | loses the expected-vs-observed task context needed to debug contaminated lanes Confidence: high Scope-risk: narrow Reversibility: clean Directive: If task-receipt fields change later, update the WorkerSendPrompt schema, worker payload serialization, and wrong-task regression together Tested: cargo fmt --all --check; cargo clippy --workspace --all-targets -- -D warnings; cargo test --workspace; architect review APPROVE Not-tested: External orchestrators that have not yet started populating the optional task_receipt field
2026-05-30 03:35:20 +08:00 · 2026-04-12 07:00:07 +00:00 · 2026-04-12 07:00:07 +00:00 · f309ff8642
commit f309ff8642
parent 3b806702e7
4 changed files with 195 additions and 12 deletions
--- a/ROADMAP.md
+++ b/ROADMAP.md
@ -513,3 +513,5 @@ Model name prefix now wins unconditionally over env-var presence. Regression tes
 69. **Lane stop summaries have no minimum quality floor** — **done (verified 2026-04-12):** completed lane persistence in `rust/crates/tools/src/lib.rs` now normalizes vague/control-only stop summaries into a contextual fallback that includes the lane target and status, while preserving structured metadata about whether the quality floor fired (`qualityFloorApplied`, `rawSummary`, `reasons`, `wordCount`). Regression coverage locks both the pass-through path for good summaries and the fallback path for mushy summaries like `commit push everyting, keep sweeping $ralph`. **Original filing below.**

 70. **Install-source ambiguity misleads real users** — **done (verified 2026-04-12):** repo-local Rust guidance now makes the source of truth explicit in `claw doctor` and `claw --help`, naming `ultraworkers/claw-code` as the canonical repo and warning that `cargo install claw-code` installs a deprecated stub rather than the `claw` binary. Regression coverage locks both the new doctor JSON check and the help-text warning. **Original filing below.**
+
+71. **Wrong-task prompt receipt is not detected before execution** — **done (verified 2026-04-12):** worker boot prompt dispatch now accepts an optional structured `task_receipt` (`repo`, `task_kind`, `source_surface`, `expected_artifacts`, `objective_preview`) and treats mismatched visible prompt context as a `WrongTask` prompt-delivery failure before execution continues. The prompt-delivery payload now records `observed_prompt_preview` plus the expected receipt, and regression coverage locks both the existing shell/wrong-target paths and the new KakaoTalk-style wrong-task mismatch case. **Original filing below.**
--- a/rust/crates/runtime/src/worker_boot.rs
+++ b/rust/crates/runtime/src/worker_boot.rs
@ -92,6 +92,7 @@ pub enum WorkerTrustResolution {
 pub enum WorkerPromptTarget {
    Shell,
    WrongTarget,
+    WrongTask,
    Unknown,
 }

@ -108,10 +109,24 @@ pub enum WorkerEventPayload {
        observed_target: WorkerPromptTarget,
        #[serde(skip_serializing_if = "Option::is_none")]
        observed_cwd: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        observed_prompt_preview: Option<String>,
+        #[serde(skip_serializing_if = "Option::is_none")]
+        task_receipt: Option<WorkerTaskReceipt>,
        recovery_armed: bool,
    },
 }

+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
+pub struct WorkerTaskReceipt {
+    pub repo: String,
+    pub task_kind: String,
+    pub source_surface: String,
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub expected_artifacts: Vec<String>,
+    pub objective_preview: String,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
 pub struct WorkerEvent {
    pub seq: u64,
@ -134,6 +149,7 @@ pub struct Worker {
    pub prompt_delivery_attempts: u32,
    pub prompt_in_flight: bool,
    pub last_prompt: Option<String>,
+    pub expected_receipt: Option<WorkerTaskReceipt>,
    pub replay_prompt: Option<String>,
    pub last_error: Option<WorkerFailure>,
    pub created_at: u64,
@ -182,6 +198,7 @@ impl WorkerRegistry {
            prompt_delivery_attempts: 0,
            prompt_in_flight: false,
            last_prompt: None,
+            expected_receipt: None,
            replay_prompt: None,
            last_error: None,
            created_at: ts,
@ -257,6 +274,7 @@ impl WorkerRegistry {
                    &lowered,
                    worker.last_prompt.as_deref(),
                    &worker.cwd,
+                    worker.expected_receipt.as_ref(),
                )
            })
            .flatten()
@ -272,6 +290,10 @@ impl WorkerRegistry {
                    "worker prompt landed in the wrong target instead of {}: {}",
                    worker.cwd, prompt_preview
                ),
+                WorkerPromptTarget::WrongTask => format!(
+                    "worker prompt receipt mismatched the expected task context for {}: {}",
+                    worker.cwd, prompt_preview
+                ),
                WorkerPromptTarget::Unknown => format!(
                    "worker prompt delivery failed before reaching coding agent: {prompt_preview}"
                ),
@ -291,6 +313,8 @@ impl WorkerRegistry {
                    prompt_preview: prompt_preview.clone(),
                    observed_target: observation.target,
                    observed_cwd: observation.observed_cwd.clone(),
+                    observed_prompt_preview: observation.observed_prompt_preview.clone(),
+                    task_receipt: worker.expected_receipt.clone(),
                    recovery_armed: false,
                }),
            );
@ -306,6 +330,8 @@ impl WorkerRegistry {
                        prompt_preview,
                        observed_target: observation.target,
                        observed_cwd: observation.observed_cwd,
+                        observed_prompt_preview: observation.observed_prompt_preview,
+                        task_receipt: worker.expected_receipt.clone(),
                        recovery_armed: true,
                    }),
                );
@ -374,7 +400,12 @@ impl WorkerRegistry {
        Ok(worker.clone())
    }

-    pub fn send_prompt(&self, worker_id: &str, prompt: Option<&str>) -> Result<Worker, String> {
+    pub fn send_prompt(
+        &self,
+        worker_id: &str,
+        prompt: Option<&str>,
+        task_receipt: Option<WorkerTaskReceipt>,
+    ) -> Result<Worker, String> {
        let mut inner = self.inner.lock().expect("worker registry lock poisoned");
        let worker = inner
            .workers
@ -398,6 +429,7 @@ impl WorkerRegistry {
        worker.prompt_delivery_attempts += 1;
        worker.prompt_in_flight = true;
        worker.last_prompt = Some(next_prompt.clone());
+        worker.expected_receipt = task_receipt;
        worker.replay_prompt = None;
        worker.last_error = None;
        worker.status = WorkerStatus::Running;
@ -548,6 +580,7 @@ fn prompt_misdelivery_is_relevant(worker: &Worker) -> bool {
 struct PromptDeliveryObservation {
    target: WorkerPromptTarget,
    observed_cwd: Option<String>,
+    observed_prompt_preview: Option<String>,
 }

 fn push_event(
@ -699,6 +732,7 @@ fn detect_prompt_misdelivery(
    lowered: &str,
    prompt: Option<&str>,
    expected_cwd: &str,
+    expected_receipt: Option<&WorkerTaskReceipt>,
 ) -> Option<PromptDeliveryObservation> {
    let Some(prompt) = prompt else {
        return None;
@ -713,12 +747,30 @@ fn detect_prompt_misdelivery(
        return None;
    }
    let prompt_visible = lowered.contains(&prompt_snippet);
+    let observed_prompt_preview = detect_prompt_echo(screen_text);
+
+    if let Some(receipt) = expected_receipt {
+        let receipt_visible = task_receipt_visible(lowered, receipt);
+        let mismatched_prompt_visible = observed_prompt_preview
+            .as_deref()
+            .map(str::to_ascii_lowercase)
+            .is_some_and(|preview| !preview.contains(&prompt_snippet));
+
+        if (prompt_visible || mismatched_prompt_visible) && !receipt_visible {
+            return Some(PromptDeliveryObservation {
+                target: WorkerPromptTarget::WrongTask,
+                observed_cwd: detect_observed_shell_cwd(screen_text),
+                observed_prompt_preview,
+            });
+        }
+    }

    if let Some(observed_cwd) = detect_observed_shell_cwd(screen_text) {
        if prompt_visible && !cwd_matches_observed_target(expected_cwd, &observed_cwd) {
            return Some(PromptDeliveryObservation {
                target: WorkerPromptTarget::WrongTarget,
                observed_cwd: Some(observed_cwd),
+                observed_prompt_preview,
            });
        }
    }
@ -736,6 +788,7 @@ fn detect_prompt_misdelivery(
    (shell_error && prompt_visible).then_some(PromptDeliveryObservation {
        target: WorkerPromptTarget::Shell,
        observed_cwd: None,
+        observed_prompt_preview,
    })
 }

@ -748,10 +801,38 @@ fn prompt_preview(prompt: &str) -> String {
    format!("{}…", preview.trim_end())
 }

+fn detect_prompt_echo(screen_text: &str) -> Option<String> {
+    screen_text.lines().find_map(|line| {
+        line.trim_start()
+            .strip_prefix('›')
+            .map(str::trim)
+            .filter(|value| !value.is_empty())
+            .map(str::to_string)
+    })
+}
+
+fn task_receipt_visible(lowered_screen_text: &str, receipt: &WorkerTaskReceipt) -> bool {
+    let expected_tokens = [
+        receipt.repo.to_ascii_lowercase(),
+        receipt.task_kind.to_ascii_lowercase(),
+        receipt.source_surface.to_ascii_lowercase(),
+        receipt.objective_preview.to_ascii_lowercase(),
+    ];
+
+    expected_tokens
+        .iter()
+        .all(|token| lowered_screen_text.contains(token))
+        && receipt
+            .expected_artifacts
+            .iter()
+            .all(|artifact| lowered_screen_text.contains(&artifact.to_ascii_lowercase()))
+}
+
 fn prompt_misdelivery_detail(observation: &PromptDeliveryObservation) -> &'static str {
    match observation.target {
        WorkerPromptTarget::Shell => "shell misdelivery detected",
        WorkerPromptTarget::WrongTarget => "prompt landed in wrong target",
+        WorkerPromptTarget::WrongTask => "prompt receipt mismatched expected task context",
        WorkerPromptTarget::Unknown => "prompt delivery failure detected",
    }
 }
@ -865,7 +946,7 @@ mod tests {
            WorkerFailureKind::TrustGate
        );

-        let send_before_resolve = registry.send_prompt(&worker.worker_id, Some("ship it"));
+        let send_before_resolve = registry.send_prompt(&worker.worker_id, Some("ship it"), None);
        assert!(send_before_resolve
            .expect_err("prompt delivery should be gated")
            .contains("not ready for prompt delivery"));
@ -905,7 +986,7 @@ mod tests {
            .expect("ready observe should succeed");

        let running = registry
-            .send_prompt(&worker.worker_id, Some("Implement worker handshake"))
+            .send_prompt(&worker.worker_id, Some("Implement worker handshake"), None)
            .expect("prompt send should succeed");
        assert_eq!(running.status, WorkerStatus::Running);
        assert_eq!(running.prompt_delivery_attempts, 1);
@ -941,6 +1022,8 @@ mod tests {
                prompt_preview: "Implement worker handshake".to_string(),
                observed_target: WorkerPromptTarget::Shell,
                observed_cwd: None,
+                observed_prompt_preview: None,
+                task_receipt: None,
                recovery_armed: false,
            })
        );
@ -956,12 +1039,14 @@ mod tests {
                prompt_preview: "Implement worker handshake".to_string(),
                observed_target: WorkerPromptTarget::Shell,
                observed_cwd: None,
+                observed_prompt_preview: None,
+                task_receipt: None,
                recovery_armed: true,
            })
        );

        let replayed = registry
-            .send_prompt(&worker.worker_id, None)
+            .send_prompt(&worker.worker_id, None, None)
            .expect("replay send should succeed");
        assert_eq!(replayed.status, WorkerStatus::Running);
        assert!(replayed.replay_prompt.is_none());
@ -976,7 +1061,11 @@ mod tests {
            .observe(&worker.worker_id, "Ready for input\n>")
            .expect("ready observe should succeed");
        registry
-            .send_prompt(&worker.worker_id, Some("Run the worker bootstrap tests"))
+            .send_prompt(
+                &worker.worker_id,
+                Some("Run the worker bootstrap tests"),
+                None,
+            )
            .expect("prompt send should succeed");

        let recovered = registry
@ -1007,6 +1096,8 @@ mod tests {
                prompt_preview: "Run the worker bootstrap tests".to_string(),
                observed_target: WorkerPromptTarget::WrongTarget,
                observed_cwd: Some("/tmp/repo-target-b".to_string()),
+                observed_prompt_preview: None,
+                task_receipt: None,
                recovery_armed: false,
            })
        );
@ -1049,6 +1140,75 @@ mod tests {
        assert!(ready.last_error.is_none());
    }

+    #[test]
+    fn wrong_task_receipt_mismatch_is_detected_before_execution_continues() {
+        let registry = WorkerRegistry::new();
+        let worker = registry.create("/tmp/repo-task", &[], true);
+        registry
+            .observe(&worker.worker_id, "Ready for input\n>")
+            .expect("ready observe should succeed");
+        registry
+            .send_prompt(
+                &worker.worker_id,
+                Some("Implement worker handshake"),
+                Some(WorkerTaskReceipt {
+                    repo: "claw-code".to_string(),
+                    task_kind: "repo_code".to_string(),
+                    source_surface: "omx_team".to_string(),
+                    expected_artifacts: vec!["patch".to_string(), "tests".to_string()],
+                    objective_preview: "Implement worker handshake".to_string(),
+                }),
+            )
+            .expect("prompt send should succeed");
+
+        let recovered = registry
+            .observe(
+                &worker.worker_id,
+                "› Explain this KakaoTalk screenshot for a friend\nI can help analyze the screenshot…",
+            )
+            .expect("mismatch observe should succeed");
+
+        assert_eq!(recovered.status, WorkerStatus::ReadyForPrompt);
+        assert_eq!(
+            recovered
+                .last_error
+                .expect("mismatch error should exist")
+                .kind,
+            WorkerFailureKind::PromptDelivery
+        );
+        let mismatch = recovered
+            .events
+            .iter()
+            .find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
+            .expect("wrong-task event should exist");
+        assert_eq!(mismatch.status, WorkerStatus::Failed);
+        assert_eq!(
+            mismatch.payload,
+            Some(WorkerEventPayload::PromptDelivery {
+                prompt_preview: "Implement worker handshake".to_string(),
+                observed_target: WorkerPromptTarget::WrongTask,
+                observed_cwd: None,
+                observed_prompt_preview: Some(
+                    "Explain this KakaoTalk screenshot for a friend".to_string()
+                ),
+                task_receipt: Some(WorkerTaskReceipt {
+                    repo: "claw-code".to_string(),
+                    task_kind: "repo_code".to_string(),
+                    source_surface: "omx_team".to_string(),
+                    expected_artifacts: vec!["patch".to_string(), "tests".to_string()],
+                    objective_preview: "Implement worker handshake".to_string(),
+                }),
+                recovery_armed: false,
+            })
+        );
+        let replay = recovered
+            .events
+            .iter()
+            .find(|event| event.kind == WorkerEventKind::PromptReplayArmed)
+            .expect("replay event should exist");
+        assert_eq!(replay.status, WorkerStatus::ReadyForPrompt);
+    }
+
    #[test]
    fn restart_and_terminate_reset_or_finish_worker() {
        let registry = WorkerRegistry::new();
@ -1057,7 +1217,7 @@ mod tests {
            .observe(&worker.worker_id, "Ready for input\n>")
            .expect("ready observe should succeed");
        registry
-            .send_prompt(&worker.worker_id, Some("Run tests"))
+            .send_prompt(&worker.worker_id, Some("Run tests"), None)
            .expect("prompt send should succeed");

        let restarted = registry
@ -1086,7 +1246,7 @@ mod tests {
            .observe(&worker.worker_id, "Ready for input\n>")
            .expect("ready observe should succeed");
        registry
-            .send_prompt(&worker.worker_id, Some("Run tests"))
+            .send_prompt(&worker.worker_id, Some("Run tests"), None)
            .expect("prompt send should succeed");

        let failed = registry
@ -1163,7 +1323,7 @@ mod tests {
            .observe(&worker.worker_id, "Ready for input\n>")
            .expect("ready observe should succeed");
        registry
-            .send_prompt(&worker.worker_id, Some("Run tests"))
+            .send_prompt(&worker.worker_id, Some("Run tests"), None)
            .expect("prompt send should succeed");

        let finished = registry
--- a/rust/crates/runtime/tests/integration_tests.rs
+++ b/rust/crates/runtime/tests/integration_tests.rs
@ -304,7 +304,7 @@ fn worker_provider_failure_flows_through_recovery_to_policy() {
        .observe(&worker.worker_id, "Ready for your input\n>")
        .expect("ready observe should succeed");
    registry
-        .send_prompt(&worker.worker_id, Some("Run analysis"))
+        .send_prompt(&worker.worker_id, Some("Run analysis"), None)
        .expect("prompt send should succeed");

    // Session completes with provider failure (finish="unknown", tokens=0)
--- a/rust/crates/tools/src/lib.rs
+++ b/rust/crates/tools/src/lib.rs
@ -20,7 +20,7 @@ use runtime::{
    summary_compression::compress_summary_text,
    task_registry::TaskRegistry,
    team_cron_registry::{CronRegistry, TeamRegistry},
-    worker_boot::{WorkerReadySnapshot, WorkerRegistry},
+    worker_boot::{WorkerReadySnapshot, WorkerRegistry, WorkerTaskReceipt},
    write_file, ApiClient, ApiRequest, AssistantEvent, BashCommandInput, BashCommandOutput,
    BranchFreshness, ConfigLoader, ContentBlock, ConversationMessage, ConversationRuntime,
    GrepSearchInput, LaneCommitProvenance, LaneEvent, LaneEventBlocker, LaneEventName,
@ -930,7 +930,22 @@ pub fn mvp_tool_specs() -> Vec<ToolSpec> {
                "type": "object",
                "properties": {
                    "worker_id": { "type": "string" },
-                    "prompt": { "type": "string" }
+                    "prompt": { "type": "string" },
+                    "task_receipt": {
+                        "type": "object",
+                        "properties": {
+                            "repo": { "type": "string" },
+                            "task_kind": { "type": "string" },
+                            "source_surface": { "type": "string" },
+                            "expected_artifacts": {
+                                "type": "array",
+                                "items": { "type": "string" }
+                            },
+                            "objective_preview": { "type": "string" }
+                        },
+                        "required": ["repo", "task_kind", "source_surface", "objective_preview"],
+                        "additionalProperties": false
+                    }
                },
                "required": ["worker_id"],
                "additionalProperties": false
@ -1522,7 +1537,11 @@ fn run_worker_await_ready(input: WorkerIdInput) -> Result<String, String> {

 #[allow(clippy::needless_pass_by_value)]
 fn run_worker_send_prompt(input: WorkerSendPromptInput) -> Result<String, String> {
-    let worker = global_worker_registry().send_prompt(&input.worker_id, input.prompt.as_deref())?;
+    let worker = global_worker_registry().send_prompt(
+        &input.worker_id,
+        input.prompt.as_deref(),
+        input.task_receipt,
+    )?;
    to_pretty_json(worker)
 }

@ -2439,6 +2458,8 @@ struct WorkerSendPromptInput {
    worker_id: String,
    #[serde(default)]
    prompt: Option<String>,
+    #[serde(default)]
+    task_receipt: Option<WorkerTaskReceipt>,
 }

 const fn default_auto_recover_prompt_misdelivery() -> bool {