Stop repo lanes from executing the wrong task payload

The next repo-local sweep target was ROADMAP #71: a claw-code lane
accepted an unrelated KakaoTalk/image-analysis prompt even though the
lane itself was supposed to be repo-scoped work. This extends the
existing prompt-misdelivery guardrail with an optional structured task
receipt so worker boot can reject visible wrong-task context before the
lane continues executing.

Constraint: Keep the fix inside the existing worker_boot / WorkerSendPrompt control surface instead of inventing a new external OMX-only protocol
Rejected: Treat wrong-task receipts as generic shell misdelivery | loses the expected-vs-observed task context needed to debug contaminated lanes
Confidence: high
Scope-risk: narrow
Reversibility: clean
Directive: If task-receipt fields change later, update the WorkerSendPrompt schema, worker payload serialization, and wrong-task regression together
Tested: cargo fmt --all --check; cargo clippy --workspace --all-targets -- -D warnings; cargo test --workspace; architect review APPROVE
Not-tested: External orchestrators that have not yet started populating the optional task_receipt field
This commit is contained in:
Yeachan-Heo 2026-04-12 07:00:07 +00:00
parent 3b806702e7
commit f309ff8642
4 changed files with 195 additions and 12 deletions

View File

@ -513,3 +513,5 @@ Model name prefix now wins unconditionally over env-var presence. Regression tes
69. **Lane stop summaries have no minimum quality floor****done (verified 2026-04-12):** completed lane persistence in `rust/crates/tools/src/lib.rs` now normalizes vague/control-only stop summaries into a contextual fallback that includes the lane target and status, while preserving structured metadata about whether the quality floor fired (`qualityFloorApplied`, `rawSummary`, `reasons`, `wordCount`). Regression coverage locks both the pass-through path for good summaries and the fallback path for mushy summaries like `commit push everyting, keep sweeping $ralph`. **Original filing below.**
70. **Install-source ambiguity misleads real users****done (verified 2026-04-12):** repo-local Rust guidance now makes the source of truth explicit in `claw doctor` and `claw --help`, naming `ultraworkers/claw-code` as the canonical repo and warning that `cargo install claw-code` installs a deprecated stub rather than the `claw` binary. Regression coverage locks both the new doctor JSON check and the help-text warning. **Original filing below.**
71. **Wrong-task prompt receipt is not detected before execution****done (verified 2026-04-12):** worker boot prompt dispatch now accepts an optional structured `task_receipt` (`repo`, `task_kind`, `source_surface`, `expected_artifacts`, `objective_preview`) and treats mismatched visible prompt context as a `WrongTask` prompt-delivery failure before execution continues. The prompt-delivery payload now records `observed_prompt_preview` plus the expected receipt, and regression coverage locks both the existing shell/wrong-target paths and the new KakaoTalk-style wrong-task mismatch case. **Original filing below.**

View File

@ -92,6 +92,7 @@ pub enum WorkerTrustResolution {
pub enum WorkerPromptTarget {
Shell,
WrongTarget,
WrongTask,
Unknown,
}
@ -108,10 +109,24 @@ pub enum WorkerEventPayload {
observed_target: WorkerPromptTarget,
#[serde(skip_serializing_if = "Option::is_none")]
observed_cwd: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
observed_prompt_preview: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
task_receipt: Option<WorkerTaskReceipt>,
recovery_armed: bool,
},
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct WorkerTaskReceipt {
pub repo: String,
pub task_kind: String,
pub source_surface: String,
#[serde(default, skip_serializing_if = "Vec::is_empty")]
pub expected_artifacts: Vec<String>,
pub objective_preview: String,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
pub struct WorkerEvent {
pub seq: u64,
@ -134,6 +149,7 @@ pub struct Worker {
pub prompt_delivery_attempts: u32,
pub prompt_in_flight: bool,
pub last_prompt: Option<String>,
pub expected_receipt: Option<WorkerTaskReceipt>,
pub replay_prompt: Option<String>,
pub last_error: Option<WorkerFailure>,
pub created_at: u64,
@ -182,6 +198,7 @@ impl WorkerRegistry {
prompt_delivery_attempts: 0,
prompt_in_flight: false,
last_prompt: None,
expected_receipt: None,
replay_prompt: None,
last_error: None,
created_at: ts,
@ -257,6 +274,7 @@ impl WorkerRegistry {
&lowered,
worker.last_prompt.as_deref(),
&worker.cwd,
worker.expected_receipt.as_ref(),
)
})
.flatten()
@ -272,6 +290,10 @@ impl WorkerRegistry {
"worker prompt landed in the wrong target instead of {}: {}",
worker.cwd, prompt_preview
),
WorkerPromptTarget::WrongTask => format!(
"worker prompt receipt mismatched the expected task context for {}: {}",
worker.cwd, prompt_preview
),
WorkerPromptTarget::Unknown => format!(
"worker prompt delivery failed before reaching coding agent: {prompt_preview}"
),
@ -291,6 +313,8 @@ impl WorkerRegistry {
prompt_preview: prompt_preview.clone(),
observed_target: observation.target,
observed_cwd: observation.observed_cwd.clone(),
observed_prompt_preview: observation.observed_prompt_preview.clone(),
task_receipt: worker.expected_receipt.clone(),
recovery_armed: false,
}),
);
@ -306,6 +330,8 @@ impl WorkerRegistry {
prompt_preview,
observed_target: observation.target,
observed_cwd: observation.observed_cwd,
observed_prompt_preview: observation.observed_prompt_preview,
task_receipt: worker.expected_receipt.clone(),
recovery_armed: true,
}),
);
@ -374,7 +400,12 @@ impl WorkerRegistry {
Ok(worker.clone())
}
pub fn send_prompt(&self, worker_id: &str, prompt: Option<&str>) -> Result<Worker, String> {
pub fn send_prompt(
&self,
worker_id: &str,
prompt: Option<&str>,
task_receipt: Option<WorkerTaskReceipt>,
) -> Result<Worker, String> {
let mut inner = self.inner.lock().expect("worker registry lock poisoned");
let worker = inner
.workers
@ -398,6 +429,7 @@ impl WorkerRegistry {
worker.prompt_delivery_attempts += 1;
worker.prompt_in_flight = true;
worker.last_prompt = Some(next_prompt.clone());
worker.expected_receipt = task_receipt;
worker.replay_prompt = None;
worker.last_error = None;
worker.status = WorkerStatus::Running;
@ -548,6 +580,7 @@ fn prompt_misdelivery_is_relevant(worker: &Worker) -> bool {
struct PromptDeliveryObservation {
target: WorkerPromptTarget,
observed_cwd: Option<String>,
observed_prompt_preview: Option<String>,
}
fn push_event(
@ -699,6 +732,7 @@ fn detect_prompt_misdelivery(
lowered: &str,
prompt: Option<&str>,
expected_cwd: &str,
expected_receipt: Option<&WorkerTaskReceipt>,
) -> Option<PromptDeliveryObservation> {
let Some(prompt) = prompt else {
return None;
@ -713,12 +747,30 @@ fn detect_prompt_misdelivery(
return None;
}
let prompt_visible = lowered.contains(&prompt_snippet);
let observed_prompt_preview = detect_prompt_echo(screen_text);
if let Some(receipt) = expected_receipt {
let receipt_visible = task_receipt_visible(lowered, receipt);
let mismatched_prompt_visible = observed_prompt_preview
.as_deref()
.map(str::to_ascii_lowercase)
.is_some_and(|preview| !preview.contains(&prompt_snippet));
if (prompt_visible || mismatched_prompt_visible) && !receipt_visible {
return Some(PromptDeliveryObservation {
target: WorkerPromptTarget::WrongTask,
observed_cwd: detect_observed_shell_cwd(screen_text),
observed_prompt_preview,
});
}
}
if let Some(observed_cwd) = detect_observed_shell_cwd(screen_text) {
if prompt_visible && !cwd_matches_observed_target(expected_cwd, &observed_cwd) {
return Some(PromptDeliveryObservation {
target: WorkerPromptTarget::WrongTarget,
observed_cwd: Some(observed_cwd),
observed_prompt_preview,
});
}
}
@ -736,6 +788,7 @@ fn detect_prompt_misdelivery(
(shell_error && prompt_visible).then_some(PromptDeliveryObservation {
target: WorkerPromptTarget::Shell,
observed_cwd: None,
observed_prompt_preview,
})
}
@ -748,10 +801,38 @@ fn prompt_preview(prompt: &str) -> String {
format!("{}", preview.trim_end())
}
fn detect_prompt_echo(screen_text: &str) -> Option<String> {
screen_text.lines().find_map(|line| {
line.trim_start()
.strip_prefix('')
.map(str::trim)
.filter(|value| !value.is_empty())
.map(str::to_string)
})
}
fn task_receipt_visible(lowered_screen_text: &str, receipt: &WorkerTaskReceipt) -> bool {
let expected_tokens = [
receipt.repo.to_ascii_lowercase(),
receipt.task_kind.to_ascii_lowercase(),
receipt.source_surface.to_ascii_lowercase(),
receipt.objective_preview.to_ascii_lowercase(),
];
expected_tokens
.iter()
.all(|token| lowered_screen_text.contains(token))
&& receipt
.expected_artifacts
.iter()
.all(|artifact| lowered_screen_text.contains(&artifact.to_ascii_lowercase()))
}
fn prompt_misdelivery_detail(observation: &PromptDeliveryObservation) -> &'static str {
match observation.target {
WorkerPromptTarget::Shell => "shell misdelivery detected",
WorkerPromptTarget::WrongTarget => "prompt landed in wrong target",
WorkerPromptTarget::WrongTask => "prompt receipt mismatched expected task context",
WorkerPromptTarget::Unknown => "prompt delivery failure detected",
}
}
@ -865,7 +946,7 @@ mod tests {
WorkerFailureKind::TrustGate
);
let send_before_resolve = registry.send_prompt(&worker.worker_id, Some("ship it"));
let send_before_resolve = registry.send_prompt(&worker.worker_id, Some("ship it"), None);
assert!(send_before_resolve
.expect_err("prompt delivery should be gated")
.contains("not ready for prompt delivery"));
@ -905,7 +986,7 @@ mod tests {
.expect("ready observe should succeed");
let running = registry
.send_prompt(&worker.worker_id, Some("Implement worker handshake"))
.send_prompt(&worker.worker_id, Some("Implement worker handshake"), None)
.expect("prompt send should succeed");
assert_eq!(running.status, WorkerStatus::Running);
assert_eq!(running.prompt_delivery_attempts, 1);
@ -941,6 +1022,8 @@ mod tests {
prompt_preview: "Implement worker handshake".to_string(),
observed_target: WorkerPromptTarget::Shell,
observed_cwd: None,
observed_prompt_preview: None,
task_receipt: None,
recovery_armed: false,
})
);
@ -956,12 +1039,14 @@ mod tests {
prompt_preview: "Implement worker handshake".to_string(),
observed_target: WorkerPromptTarget::Shell,
observed_cwd: None,
observed_prompt_preview: None,
task_receipt: None,
recovery_armed: true,
})
);
let replayed = registry
.send_prompt(&worker.worker_id, None)
.send_prompt(&worker.worker_id, None, None)
.expect("replay send should succeed");
assert_eq!(replayed.status, WorkerStatus::Running);
assert!(replayed.replay_prompt.is_none());
@ -976,7 +1061,11 @@ mod tests {
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(&worker.worker_id, Some("Run the worker bootstrap tests"))
.send_prompt(
&worker.worker_id,
Some("Run the worker bootstrap tests"),
None,
)
.expect("prompt send should succeed");
let recovered = registry
@ -1007,6 +1096,8 @@ mod tests {
prompt_preview: "Run the worker bootstrap tests".to_string(),
observed_target: WorkerPromptTarget::WrongTarget,
observed_cwd: Some("/tmp/repo-target-b".to_string()),
observed_prompt_preview: None,
task_receipt: None,
recovery_armed: false,
})
);
@ -1049,6 +1140,75 @@ mod tests {
assert!(ready.last_error.is_none());
}
#[test]
fn wrong_task_receipt_mismatch_is_detected_before_execution_continues() {
let registry = WorkerRegistry::new();
let worker = registry.create("/tmp/repo-task", &[], true);
registry
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(
&worker.worker_id,
Some("Implement worker handshake"),
Some(WorkerTaskReceipt {
repo: "claw-code".to_string(),
task_kind: "repo_code".to_string(),
source_surface: "omx_team".to_string(),
expected_artifacts: vec!["patch".to_string(), "tests".to_string()],
objective_preview: "Implement worker handshake".to_string(),
}),
)
.expect("prompt send should succeed");
let recovered = registry
.observe(
&worker.worker_id,
" Explain this KakaoTalk screenshot for a friend\nI can help analyze the screenshot…",
)
.expect("mismatch observe should succeed");
assert_eq!(recovered.status, WorkerStatus::ReadyForPrompt);
assert_eq!(
recovered
.last_error
.expect("mismatch error should exist")
.kind,
WorkerFailureKind::PromptDelivery
);
let mismatch = recovered
.events
.iter()
.find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
.expect("wrong-task event should exist");
assert_eq!(mismatch.status, WorkerStatus::Failed);
assert_eq!(
mismatch.payload,
Some(WorkerEventPayload::PromptDelivery {
prompt_preview: "Implement worker handshake".to_string(),
observed_target: WorkerPromptTarget::WrongTask,
observed_cwd: None,
observed_prompt_preview: Some(
"Explain this KakaoTalk screenshot for a friend".to_string()
),
task_receipt: Some(WorkerTaskReceipt {
repo: "claw-code".to_string(),
task_kind: "repo_code".to_string(),
source_surface: "omx_team".to_string(),
expected_artifacts: vec!["patch".to_string(), "tests".to_string()],
objective_preview: "Implement worker handshake".to_string(),
}),
recovery_armed: false,
})
);
let replay = recovered
.events
.iter()
.find(|event| event.kind == WorkerEventKind::PromptReplayArmed)
.expect("replay event should exist");
assert_eq!(replay.status, WorkerStatus::ReadyForPrompt);
}
#[test]
fn restart_and_terminate_reset_or_finish_worker() {
let registry = WorkerRegistry::new();
@ -1057,7 +1217,7 @@ mod tests {
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(&worker.worker_id, Some("Run tests"))
.send_prompt(&worker.worker_id, Some("Run tests"), None)
.expect("prompt send should succeed");
let restarted = registry
@ -1086,7 +1246,7 @@ mod tests {
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(&worker.worker_id, Some("Run tests"))
.send_prompt(&worker.worker_id, Some("Run tests"), None)
.expect("prompt send should succeed");
let failed = registry
@ -1163,7 +1323,7 @@ mod tests {
.observe(&worker.worker_id, "Ready for input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(&worker.worker_id, Some("Run tests"))
.send_prompt(&worker.worker_id, Some("Run tests"), None)
.expect("prompt send should succeed");
let finished = registry

View File

@ -304,7 +304,7 @@ fn worker_provider_failure_flows_through_recovery_to_policy() {
.observe(&worker.worker_id, "Ready for your input\n>")
.expect("ready observe should succeed");
registry
.send_prompt(&worker.worker_id, Some("Run analysis"))
.send_prompt(&worker.worker_id, Some("Run analysis"), None)
.expect("prompt send should succeed");
// Session completes with provider failure (finish="unknown", tokens=0)

View File

@ -20,7 +20,7 @@ use runtime::{
summary_compression::compress_summary_text,
task_registry::TaskRegistry,
team_cron_registry::{CronRegistry, TeamRegistry},
worker_boot::{WorkerReadySnapshot, WorkerRegistry},
worker_boot::{WorkerReadySnapshot, WorkerRegistry, WorkerTaskReceipt},
write_file, ApiClient, ApiRequest, AssistantEvent, BashCommandInput, BashCommandOutput,
BranchFreshness, ConfigLoader, ContentBlock, ConversationMessage, ConversationRuntime,
GrepSearchInput, LaneCommitProvenance, LaneEvent, LaneEventBlocker, LaneEventName,
@ -930,7 +930,22 @@ pub fn mvp_tool_specs() -> Vec<ToolSpec> {
"type": "object",
"properties": {
"worker_id": { "type": "string" },
"prompt": { "type": "string" }
"prompt": { "type": "string" },
"task_receipt": {
"type": "object",
"properties": {
"repo": { "type": "string" },
"task_kind": { "type": "string" },
"source_surface": { "type": "string" },
"expected_artifacts": {
"type": "array",
"items": { "type": "string" }
},
"objective_preview": { "type": "string" }
},
"required": ["repo", "task_kind", "source_surface", "objective_preview"],
"additionalProperties": false
}
},
"required": ["worker_id"],
"additionalProperties": false
@ -1522,7 +1537,11 @@ fn run_worker_await_ready(input: WorkerIdInput) -> Result<String, String> {
#[allow(clippy::needless_pass_by_value)]
fn run_worker_send_prompt(input: WorkerSendPromptInput) -> Result<String, String> {
let worker = global_worker_registry().send_prompt(&input.worker_id, input.prompt.as_deref())?;
let worker = global_worker_registry().send_prompt(
&input.worker_id,
input.prompt.as_deref(),
input.task_receipt,
)?;
to_pretty_json(worker)
}
@ -2439,6 +2458,8 @@ struct WorkerSendPromptInput {
worker_id: String,
#[serde(default)]
prompt: Option<String>,
#[serde(default)]
task_receipt: Option<WorkerTaskReceipt>,
}
const fn default_auto_recover_prompt_misdelivery() -> bool {