mirror of
https://github.com/ultraworkers/claw-code.git
synced 2026-04-13 04:05:52 +08:00
Stop repo lanes from executing the wrong task payload
The next repo-local sweep target was ROADMAP #71: a claw-code lane accepted an unrelated KakaoTalk/image-analysis prompt even though the lane itself was supposed to be repo-scoped work. This extends the existing prompt-misdelivery guardrail with an optional structured task receipt so worker boot can reject visible wrong-task context before the lane continues executing. Constraint: Keep the fix inside the existing worker_boot / WorkerSendPrompt control surface instead of inventing a new external OMX-only protocol Rejected: Treat wrong-task receipts as generic shell misdelivery | loses the expected-vs-observed task context needed to debug contaminated lanes Confidence: high Scope-risk: narrow Reversibility: clean Directive: If task-receipt fields change later, update the WorkerSendPrompt schema, worker payload serialization, and wrong-task regression together Tested: cargo fmt --all --check; cargo clippy --workspace --all-targets -- -D warnings; cargo test --workspace; architect review APPROVE Not-tested: External orchestrators that have not yet started populating the optional task_receipt field
This commit is contained in:
parent
3b806702e7
commit
f309ff8642
@ -513,3 +513,5 @@ Model name prefix now wins unconditionally over env-var presence. Regression tes
|
||||
69. **Lane stop summaries have no minimum quality floor** — **done (verified 2026-04-12):** completed lane persistence in `rust/crates/tools/src/lib.rs` now normalizes vague/control-only stop summaries into a contextual fallback that includes the lane target and status, while preserving structured metadata about whether the quality floor fired (`qualityFloorApplied`, `rawSummary`, `reasons`, `wordCount`). Regression coverage locks both the pass-through path for good summaries and the fallback path for mushy summaries like `commit push everyting, keep sweeping $ralph`. **Original filing below.**
|
||||
|
||||
70. **Install-source ambiguity misleads real users** — **done (verified 2026-04-12):** repo-local Rust guidance now makes the source of truth explicit in `claw doctor` and `claw --help`, naming `ultraworkers/claw-code` as the canonical repo and warning that `cargo install claw-code` installs a deprecated stub rather than the `claw` binary. Regression coverage locks both the new doctor JSON check and the help-text warning. **Original filing below.**
|
||||
|
||||
71. **Wrong-task prompt receipt is not detected before execution** — **done (verified 2026-04-12):** worker boot prompt dispatch now accepts an optional structured `task_receipt` (`repo`, `task_kind`, `source_surface`, `expected_artifacts`, `objective_preview`) and treats mismatched visible prompt context as a `WrongTask` prompt-delivery failure before execution continues. The prompt-delivery payload now records `observed_prompt_preview` plus the expected receipt, and regression coverage locks both the existing shell/wrong-target paths and the new KakaoTalk-style wrong-task mismatch case. **Original filing below.**
|
||||
|
||||
@ -92,6 +92,7 @@ pub enum WorkerTrustResolution {
|
||||
pub enum WorkerPromptTarget {
|
||||
Shell,
|
||||
WrongTarget,
|
||||
WrongTask,
|
||||
Unknown,
|
||||
}
|
||||
|
||||
@ -108,10 +109,24 @@ pub enum WorkerEventPayload {
|
||||
observed_target: WorkerPromptTarget,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
observed_cwd: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
observed_prompt_preview: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
task_receipt: Option<WorkerTaskReceipt>,
|
||||
recovery_armed: bool,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct WorkerTaskReceipt {
|
||||
pub repo: String,
|
||||
pub task_kind: String,
|
||||
pub source_surface: String,
|
||||
#[serde(default, skip_serializing_if = "Vec::is_empty")]
|
||||
pub expected_artifacts: Vec<String>,
|
||||
pub objective_preview: String,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
|
||||
pub struct WorkerEvent {
|
||||
pub seq: u64,
|
||||
@ -134,6 +149,7 @@ pub struct Worker {
|
||||
pub prompt_delivery_attempts: u32,
|
||||
pub prompt_in_flight: bool,
|
||||
pub last_prompt: Option<String>,
|
||||
pub expected_receipt: Option<WorkerTaskReceipt>,
|
||||
pub replay_prompt: Option<String>,
|
||||
pub last_error: Option<WorkerFailure>,
|
||||
pub created_at: u64,
|
||||
@ -182,6 +198,7 @@ impl WorkerRegistry {
|
||||
prompt_delivery_attempts: 0,
|
||||
prompt_in_flight: false,
|
||||
last_prompt: None,
|
||||
expected_receipt: None,
|
||||
replay_prompt: None,
|
||||
last_error: None,
|
||||
created_at: ts,
|
||||
@ -257,6 +274,7 @@ impl WorkerRegistry {
|
||||
&lowered,
|
||||
worker.last_prompt.as_deref(),
|
||||
&worker.cwd,
|
||||
worker.expected_receipt.as_ref(),
|
||||
)
|
||||
})
|
||||
.flatten()
|
||||
@ -272,6 +290,10 @@ impl WorkerRegistry {
|
||||
"worker prompt landed in the wrong target instead of {}: {}",
|
||||
worker.cwd, prompt_preview
|
||||
),
|
||||
WorkerPromptTarget::WrongTask => format!(
|
||||
"worker prompt receipt mismatched the expected task context for {}: {}",
|
||||
worker.cwd, prompt_preview
|
||||
),
|
||||
WorkerPromptTarget::Unknown => format!(
|
||||
"worker prompt delivery failed before reaching coding agent: {prompt_preview}"
|
||||
),
|
||||
@ -291,6 +313,8 @@ impl WorkerRegistry {
|
||||
prompt_preview: prompt_preview.clone(),
|
||||
observed_target: observation.target,
|
||||
observed_cwd: observation.observed_cwd.clone(),
|
||||
observed_prompt_preview: observation.observed_prompt_preview.clone(),
|
||||
task_receipt: worker.expected_receipt.clone(),
|
||||
recovery_armed: false,
|
||||
}),
|
||||
);
|
||||
@ -306,6 +330,8 @@ impl WorkerRegistry {
|
||||
prompt_preview,
|
||||
observed_target: observation.target,
|
||||
observed_cwd: observation.observed_cwd,
|
||||
observed_prompt_preview: observation.observed_prompt_preview,
|
||||
task_receipt: worker.expected_receipt.clone(),
|
||||
recovery_armed: true,
|
||||
}),
|
||||
);
|
||||
@ -374,7 +400,12 @@ impl WorkerRegistry {
|
||||
Ok(worker.clone())
|
||||
}
|
||||
|
||||
pub fn send_prompt(&self, worker_id: &str, prompt: Option<&str>) -> Result<Worker, String> {
|
||||
pub fn send_prompt(
|
||||
&self,
|
||||
worker_id: &str,
|
||||
prompt: Option<&str>,
|
||||
task_receipt: Option<WorkerTaskReceipt>,
|
||||
) -> Result<Worker, String> {
|
||||
let mut inner = self.inner.lock().expect("worker registry lock poisoned");
|
||||
let worker = inner
|
||||
.workers
|
||||
@ -398,6 +429,7 @@ impl WorkerRegistry {
|
||||
worker.prompt_delivery_attempts += 1;
|
||||
worker.prompt_in_flight = true;
|
||||
worker.last_prompt = Some(next_prompt.clone());
|
||||
worker.expected_receipt = task_receipt;
|
||||
worker.replay_prompt = None;
|
||||
worker.last_error = None;
|
||||
worker.status = WorkerStatus::Running;
|
||||
@ -548,6 +580,7 @@ fn prompt_misdelivery_is_relevant(worker: &Worker) -> bool {
|
||||
struct PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget,
|
||||
observed_cwd: Option<String>,
|
||||
observed_prompt_preview: Option<String>,
|
||||
}
|
||||
|
||||
fn push_event(
|
||||
@ -699,6 +732,7 @@ fn detect_prompt_misdelivery(
|
||||
lowered: &str,
|
||||
prompt: Option<&str>,
|
||||
expected_cwd: &str,
|
||||
expected_receipt: Option<&WorkerTaskReceipt>,
|
||||
) -> Option<PromptDeliveryObservation> {
|
||||
let Some(prompt) = prompt else {
|
||||
return None;
|
||||
@ -713,12 +747,30 @@ fn detect_prompt_misdelivery(
|
||||
return None;
|
||||
}
|
||||
let prompt_visible = lowered.contains(&prompt_snippet);
|
||||
let observed_prompt_preview = detect_prompt_echo(screen_text);
|
||||
|
||||
if let Some(receipt) = expected_receipt {
|
||||
let receipt_visible = task_receipt_visible(lowered, receipt);
|
||||
let mismatched_prompt_visible = observed_prompt_preview
|
||||
.as_deref()
|
||||
.map(str::to_ascii_lowercase)
|
||||
.is_some_and(|preview| !preview.contains(&prompt_snippet));
|
||||
|
||||
if (prompt_visible || mismatched_prompt_visible) && !receipt_visible {
|
||||
return Some(PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget::WrongTask,
|
||||
observed_cwd: detect_observed_shell_cwd(screen_text),
|
||||
observed_prompt_preview,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(observed_cwd) = detect_observed_shell_cwd(screen_text) {
|
||||
if prompt_visible && !cwd_matches_observed_target(expected_cwd, &observed_cwd) {
|
||||
return Some(PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget::WrongTarget,
|
||||
observed_cwd: Some(observed_cwd),
|
||||
observed_prompt_preview,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -736,6 +788,7 @@ fn detect_prompt_misdelivery(
|
||||
(shell_error && prompt_visible).then_some(PromptDeliveryObservation {
|
||||
target: WorkerPromptTarget::Shell,
|
||||
observed_cwd: None,
|
||||
observed_prompt_preview,
|
||||
})
|
||||
}
|
||||
|
||||
@ -748,10 +801,38 @@ fn prompt_preview(prompt: &str) -> String {
|
||||
format!("{}…", preview.trim_end())
|
||||
}
|
||||
|
||||
fn detect_prompt_echo(screen_text: &str) -> Option<String> {
|
||||
screen_text.lines().find_map(|line| {
|
||||
line.trim_start()
|
||||
.strip_prefix('›')
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(str::to_string)
|
||||
})
|
||||
}
|
||||
|
||||
fn task_receipt_visible(lowered_screen_text: &str, receipt: &WorkerTaskReceipt) -> bool {
|
||||
let expected_tokens = [
|
||||
receipt.repo.to_ascii_lowercase(),
|
||||
receipt.task_kind.to_ascii_lowercase(),
|
||||
receipt.source_surface.to_ascii_lowercase(),
|
||||
receipt.objective_preview.to_ascii_lowercase(),
|
||||
];
|
||||
|
||||
expected_tokens
|
||||
.iter()
|
||||
.all(|token| lowered_screen_text.contains(token))
|
||||
&& receipt
|
||||
.expected_artifacts
|
||||
.iter()
|
||||
.all(|artifact| lowered_screen_text.contains(&artifact.to_ascii_lowercase()))
|
||||
}
|
||||
|
||||
fn prompt_misdelivery_detail(observation: &PromptDeliveryObservation) -> &'static str {
|
||||
match observation.target {
|
||||
WorkerPromptTarget::Shell => "shell misdelivery detected",
|
||||
WorkerPromptTarget::WrongTarget => "prompt landed in wrong target",
|
||||
WorkerPromptTarget::WrongTask => "prompt receipt mismatched expected task context",
|
||||
WorkerPromptTarget::Unknown => "prompt delivery failure detected",
|
||||
}
|
||||
}
|
||||
@ -865,7 +946,7 @@ mod tests {
|
||||
WorkerFailureKind::TrustGate
|
||||
);
|
||||
|
||||
let send_before_resolve = registry.send_prompt(&worker.worker_id, Some("ship it"));
|
||||
let send_before_resolve = registry.send_prompt(&worker.worker_id, Some("ship it"), None);
|
||||
assert!(send_before_resolve
|
||||
.expect_err("prompt delivery should be gated")
|
||||
.contains("not ready for prompt delivery"));
|
||||
@ -905,7 +986,7 @@ mod tests {
|
||||
.expect("ready observe should succeed");
|
||||
|
||||
let running = registry
|
||||
.send_prompt(&worker.worker_id, Some("Implement worker handshake"))
|
||||
.send_prompt(&worker.worker_id, Some("Implement worker handshake"), None)
|
||||
.expect("prompt send should succeed");
|
||||
assert_eq!(running.status, WorkerStatus::Running);
|
||||
assert_eq!(running.prompt_delivery_attempts, 1);
|
||||
@ -941,6 +1022,8 @@ mod tests {
|
||||
prompt_preview: "Implement worker handshake".to_string(),
|
||||
observed_target: WorkerPromptTarget::Shell,
|
||||
observed_cwd: None,
|
||||
observed_prompt_preview: None,
|
||||
task_receipt: None,
|
||||
recovery_armed: false,
|
||||
})
|
||||
);
|
||||
@ -956,12 +1039,14 @@ mod tests {
|
||||
prompt_preview: "Implement worker handshake".to_string(),
|
||||
observed_target: WorkerPromptTarget::Shell,
|
||||
observed_cwd: None,
|
||||
observed_prompt_preview: None,
|
||||
task_receipt: None,
|
||||
recovery_armed: true,
|
||||
})
|
||||
);
|
||||
|
||||
let replayed = registry
|
||||
.send_prompt(&worker.worker_id, None)
|
||||
.send_prompt(&worker.worker_id, None, None)
|
||||
.expect("replay send should succeed");
|
||||
assert_eq!(replayed.status, WorkerStatus::Running);
|
||||
assert!(replayed.replay_prompt.is_none());
|
||||
@ -976,7 +1061,11 @@ mod tests {
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(&worker.worker_id, Some("Run the worker bootstrap tests"))
|
||||
.send_prompt(
|
||||
&worker.worker_id,
|
||||
Some("Run the worker bootstrap tests"),
|
||||
None,
|
||||
)
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
let recovered = registry
|
||||
@ -1007,6 +1096,8 @@ mod tests {
|
||||
prompt_preview: "Run the worker bootstrap tests".to_string(),
|
||||
observed_target: WorkerPromptTarget::WrongTarget,
|
||||
observed_cwd: Some("/tmp/repo-target-b".to_string()),
|
||||
observed_prompt_preview: None,
|
||||
task_receipt: None,
|
||||
recovery_armed: false,
|
||||
})
|
||||
);
|
||||
@ -1049,6 +1140,75 @@ mod tests {
|
||||
assert!(ready.last_error.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn wrong_task_receipt_mismatch_is_detected_before_execution_continues() {
|
||||
let registry = WorkerRegistry::new();
|
||||
let worker = registry.create("/tmp/repo-task", &[], true);
|
||||
registry
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(
|
||||
&worker.worker_id,
|
||||
Some("Implement worker handshake"),
|
||||
Some(WorkerTaskReceipt {
|
||||
repo: "claw-code".to_string(),
|
||||
task_kind: "repo_code".to_string(),
|
||||
source_surface: "omx_team".to_string(),
|
||||
expected_artifacts: vec!["patch".to_string(), "tests".to_string()],
|
||||
objective_preview: "Implement worker handshake".to_string(),
|
||||
}),
|
||||
)
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
let recovered = registry
|
||||
.observe(
|
||||
&worker.worker_id,
|
||||
"› Explain this KakaoTalk screenshot for a friend\nI can help analyze the screenshot…",
|
||||
)
|
||||
.expect("mismatch observe should succeed");
|
||||
|
||||
assert_eq!(recovered.status, WorkerStatus::ReadyForPrompt);
|
||||
assert_eq!(
|
||||
recovered
|
||||
.last_error
|
||||
.expect("mismatch error should exist")
|
||||
.kind,
|
||||
WorkerFailureKind::PromptDelivery
|
||||
);
|
||||
let mismatch = recovered
|
||||
.events
|
||||
.iter()
|
||||
.find(|event| event.kind == WorkerEventKind::PromptMisdelivery)
|
||||
.expect("wrong-task event should exist");
|
||||
assert_eq!(mismatch.status, WorkerStatus::Failed);
|
||||
assert_eq!(
|
||||
mismatch.payload,
|
||||
Some(WorkerEventPayload::PromptDelivery {
|
||||
prompt_preview: "Implement worker handshake".to_string(),
|
||||
observed_target: WorkerPromptTarget::WrongTask,
|
||||
observed_cwd: None,
|
||||
observed_prompt_preview: Some(
|
||||
"Explain this KakaoTalk screenshot for a friend".to_string()
|
||||
),
|
||||
task_receipt: Some(WorkerTaskReceipt {
|
||||
repo: "claw-code".to_string(),
|
||||
task_kind: "repo_code".to_string(),
|
||||
source_surface: "omx_team".to_string(),
|
||||
expected_artifacts: vec!["patch".to_string(), "tests".to_string()],
|
||||
objective_preview: "Implement worker handshake".to_string(),
|
||||
}),
|
||||
recovery_armed: false,
|
||||
})
|
||||
);
|
||||
let replay = recovered
|
||||
.events
|
||||
.iter()
|
||||
.find(|event| event.kind == WorkerEventKind::PromptReplayArmed)
|
||||
.expect("replay event should exist");
|
||||
assert_eq!(replay.status, WorkerStatus::ReadyForPrompt);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn restart_and_terminate_reset_or_finish_worker() {
|
||||
let registry = WorkerRegistry::new();
|
||||
@ -1057,7 +1217,7 @@ mod tests {
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"))
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"), None)
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
let restarted = registry
|
||||
@ -1086,7 +1246,7 @@ mod tests {
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"))
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"), None)
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
let failed = registry
|
||||
@ -1163,7 +1323,7 @@ mod tests {
|
||||
.observe(&worker.worker_id, "Ready for input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"))
|
||||
.send_prompt(&worker.worker_id, Some("Run tests"), None)
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
let finished = registry
|
||||
|
||||
@ -304,7 +304,7 @@ fn worker_provider_failure_flows_through_recovery_to_policy() {
|
||||
.observe(&worker.worker_id, "Ready for your input\n>")
|
||||
.expect("ready observe should succeed");
|
||||
registry
|
||||
.send_prompt(&worker.worker_id, Some("Run analysis"))
|
||||
.send_prompt(&worker.worker_id, Some("Run analysis"), None)
|
||||
.expect("prompt send should succeed");
|
||||
|
||||
// Session completes with provider failure (finish="unknown", tokens=0)
|
||||
|
||||
@ -20,7 +20,7 @@ use runtime::{
|
||||
summary_compression::compress_summary_text,
|
||||
task_registry::TaskRegistry,
|
||||
team_cron_registry::{CronRegistry, TeamRegistry},
|
||||
worker_boot::{WorkerReadySnapshot, WorkerRegistry},
|
||||
worker_boot::{WorkerReadySnapshot, WorkerRegistry, WorkerTaskReceipt},
|
||||
write_file, ApiClient, ApiRequest, AssistantEvent, BashCommandInput, BashCommandOutput,
|
||||
BranchFreshness, ConfigLoader, ContentBlock, ConversationMessage, ConversationRuntime,
|
||||
GrepSearchInput, LaneCommitProvenance, LaneEvent, LaneEventBlocker, LaneEventName,
|
||||
@ -930,7 +930,22 @@ pub fn mvp_tool_specs() -> Vec<ToolSpec> {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"worker_id": { "type": "string" },
|
||||
"prompt": { "type": "string" }
|
||||
"prompt": { "type": "string" },
|
||||
"task_receipt": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"repo": { "type": "string" },
|
||||
"task_kind": { "type": "string" },
|
||||
"source_surface": { "type": "string" },
|
||||
"expected_artifacts": {
|
||||
"type": "array",
|
||||
"items": { "type": "string" }
|
||||
},
|
||||
"objective_preview": { "type": "string" }
|
||||
},
|
||||
"required": ["repo", "task_kind", "source_surface", "objective_preview"],
|
||||
"additionalProperties": false
|
||||
}
|
||||
},
|
||||
"required": ["worker_id"],
|
||||
"additionalProperties": false
|
||||
@ -1522,7 +1537,11 @@ fn run_worker_await_ready(input: WorkerIdInput) -> Result<String, String> {
|
||||
|
||||
#[allow(clippy::needless_pass_by_value)]
|
||||
fn run_worker_send_prompt(input: WorkerSendPromptInput) -> Result<String, String> {
|
||||
let worker = global_worker_registry().send_prompt(&input.worker_id, input.prompt.as_deref())?;
|
||||
let worker = global_worker_registry().send_prompt(
|
||||
&input.worker_id,
|
||||
input.prompt.as_deref(),
|
||||
input.task_receipt,
|
||||
)?;
|
||||
to_pretty_json(worker)
|
||||
}
|
||||
|
||||
@ -2439,6 +2458,8 @@ struct WorkerSendPromptInput {
|
||||
worker_id: String,
|
||||
#[serde(default)]
|
||||
prompt: Option<String>,
|
||||
#[serde(default)]
|
||||
task_receipt: Option<WorkerTaskReceipt>,
|
||||
}
|
||||
|
||||
const fn default_auto_recover_prompt_misdelivery() -> bool {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user