feat: #143 phase 1 — claw status degrades gracefully on malformed config

Previously `claw status` hard-failed on any config parse error, emitting a bare error string and exiting 1. This took down the entire health surface for a single malformed MCP entry, even though workspace, git, model, permission, and sandbox state could all be reported independently. `claw doctor` already degraded gracefully on the exact same input. This commit matches `claw status` to that contract. Changes: - Add `StatusContext::config_load_error: Option<String>` to capture parse errors without aborting. - Rewrite `status_context()` to match on `ConfigLoader::load()`: on Err, fall back to default `SandboxConfig` for sandbox resolution and record the parse error, then continue populating workspace/git/memory fields. - JSON output gains top-level `status: "ok" | "degraded"` marker and a `config_load_error` string (null on clean runs). All other existing fields preserved for backward compat. - Text output prepends a "Config load error" block with Details + Hint when config failed to parse, then a "Status (degraded)" header on the main block. Clean runs show the usual "Status" header. - Doctor path updated to pass the config load error through StatusContext. Regression test `status_degrades_gracefully_on_malformed_mcp_config_143`: - Injects a .claw.json with one valid + one malformed mcpServers entry - Asserts status_context() returns Ok (not Err) - Asserts config_load_error names the malformed field path - Asserts workspace/sandbox fields still populated in JSON - Asserts top-level status is 'degraded' - Asserts clean config path still returns status: 'ok' Verified live on /Users/yeongyu/clawd (contains deliberately broken MCP entries): $ claw status --output-format json { "status": "degraded", "config_load_error": ".../mcpServers.missing-command: missing string field command", "model": "claude-opus-4-6", "workspace": {...}, "sandbox": {...}, ... } Phase 2 (typed error object joining #4.44 taxonomy) tracked separately. Full workspace test green except pre-existing resume_latest flake (unrelated). Closes ROADMAP #143 phase 1.
2026-06-10 11:13:33 +08:00 · 2026-04-21 18:37:42 +09:00 · 2026-04-21 18:37:42 +09:00 · e2a43fcd49
commit e2a43fcd49
parent fcd5b49428
1 changed files with 159 additions and 11 deletions
--- a/rust/crates/rusty-claude-cli/src/main.rs
+++ b/rust/crates/rusty-claude-cli/src/main.rs
@ -1649,6 +1649,9 @@ fn render_doctor_report() -> Result<DoctorReport, Box<dyn std::error::Error>> {
        git_branch,
        git_summary,
        sandbox_status: resolve_sandbox_status(sandbox_config.sandbox(), &cwd),
+        // Doctor path has its own config check; StatusContext here is only
+        // fed into health renderers that don't read config_load_error.
+        config_load_error: config.as_ref().err().map(ToString::to_string),
    };
    Ok(DoctorReport {
        checks: vec![
@ -2482,6 +2485,13 @@ struct StatusContext {
    git_branch: Option<String>,
    git_summary: GitWorkspaceSummary,
    sandbox_status: runtime::SandboxStatus,
+    /// #143: when `.claw.json` (or another loaded config file) fails to parse,
+    /// we capture the parse error here and still populate every field that
+    /// doesn't depend on runtime config (workspace, git, sandbox defaults,
+    /// discovery counts). Top-level JSON output then reports
+    /// `status: "degraded"` so claws can distinguish "status ran but config
+    /// is broken" from "status ran cleanly".
+    config_load_error: Option<String>,
 }

 #[derive(Debug, Clone, Copy)]
@ -5119,8 +5129,16 @@ fn status_json_value(
    permission_mode: &str,
    context: &StatusContext,
 ) -> serde_json::Value {
+    // #143: top-level `status` marker so claws can distinguish
+    // a clean run from a degraded run (config parse failed but other fields
+    // are still populated). `config_load_error` carries the parse-error string
+    // when present; it's a string rather than a typed object in Phase 1 and
+    // will join the typed-error taxonomy in Phase 2 (ROADMAP §4.44).
+    let degraded = context.config_load_error.is_some();
    json!({
        "kind": "status",
+        "status": if degraded { "degraded" } else { "ok" },
+        "config_load_error": context.config_load_error,
        "model": model,
        "permission_mode": permission_mode,
        "usage": {
@ -5175,22 +5193,43 @@ fn status_context(
    let cwd = env::current_dir()?;
    let loader = ConfigLoader::default_for(&cwd);
    let discovered_config_files = loader.discover().len();
-    let runtime_config = loader.load()?;
+    // #143: degrade gracefully on config parse failure rather than hard-fail.
+    // `claw doctor` already does this; `claw status` now matches that contract
+    // so that one malformed `mcpServers.*` entry doesn't take down the whole
+    // health surface (workspace, git, model, permission, sandbox can still be
+    // reported independently).
+    let (loaded_config_files, sandbox_status, config_load_error) = match loader.load() {
+        Ok(runtime_config) => (
+            runtime_config.loaded_entries().len(),
+            resolve_sandbox_status(runtime_config.sandbox(), &cwd),
+            None,
+        ),
+        Err(err) => (
+            0,
+            // Fall back to defaults for sandbox resolution so claws still see
+            // a populated sandbox section instead of a missing field. Defaults
+            // produce the same output as a runtime config with no sandbox
+            // overrides, which is the right degraded-mode shape: we cannot
+            // report what the user *intended*, only what is actually in effect.
+            resolve_sandbox_status(&runtime::SandboxConfig::default(), &cwd),
+            Some(err.to_string()),
+        ),
+    };
    let project_context = ProjectContext::discover_with_git(&cwd, DEFAULT_DATE)?;
    let (project_root, git_branch) =
        parse_git_status_metadata(project_context.git_status.as_deref());
    let git_summary = parse_git_workspace_summary(project_context.git_status.as_deref());
-    let sandbox_status = resolve_sandbox_status(runtime_config.sandbox(), &cwd);
    Ok(StatusContext {
        cwd,
        session_path: session_path.map(Path::to_path_buf),
-        loaded_config_files: runtime_config.loaded_entries().len(),
+        loaded_config_files,
        discovered_config_files,
        memory_file_count: project_context.instruction_files.len(),
        project_root,
        git_branch,
        git_summary,
        sandbox_status,
+        config_load_error,
    })
 }

@ -5200,9 +5239,24 @@ fn format_status_report(
    permission_mode: &str,
    context: &StatusContext,
 ) -> String {
-    [
+    // #143: if config failed to parse, surface a degraded banner at the top
+    // of the text report so humans see the parse error before the body, while
+    // the body below still reports everything that could be resolved without
+    // config (workspace, git, sandbox defaults, etc.).
+    let status_line = if context.config_load_error.is_some() {
+        "Status (degraded)"
+    } else {
+        "Status"
+    };
+    let mut blocks: Vec<String> = Vec::new();
+    if let Some(err) = context.config_load_error.as_deref() {
+        blocks.push(format!(
+            "Config load error\n  Status           fail\n  Summary          runtime config failed to load; reporting partial status\n  Details          {err}\n  Hint             `claw doctor` classifies config parse errors; fix the listed field and rerun"
+        ));
+    }
+    blocks.extend([
        format!(
-            "Status
+            "{status_line}
  Model            {model}
  Permission mode  {permission_mode}
  Messages         {}
@ -5255,12 +5309,8 @@ fn format_status_report(
            context.memory_file_count,
        ),
        format_sandbox_report(&context.sandbox_status),
-    ]
-    .join(
-        "
-
-",
-    )
+    ]);
+    blocks.join("\n\n")
 }

 fn format_sandbox_report(status: &runtime::SandboxStatus) -> String {
@ -9623,6 +9673,103 @@ mod tests {
        }
    }

+    #[test]
+    fn status_degrades_gracefully_on_malformed_mcp_config_143() {
+        // #143: previously `claw status` hard-failed on any config parse error,
+        // taking down the entire health surface for one malformed MCP entry.
+        // `claw doctor` already degrades gracefully; this test locks `status`
+        // to the same contract.
+        let _guard = env_lock();
+        let root = temp_dir();
+        let cwd = root.join("project-with-malformed-mcp");
+        std::fs::create_dir_all(&cwd).expect("project dir should exist");
+        // One valid server + one malformed entry missing `command`.
+        std::fs::write(
+            cwd.join(".claw.json"),
+            r#"{
+  "mcpServers": {
+    "everything": {"command": "npx", "args": ["-y", "@modelcontextprotocol/server-everything"]},
+    "missing-command": {"args": ["arg-only-no-command"]}
+  }
+}
+"#,
+        )
+        .expect("write malformed .claw.json");
+
+        let context = with_current_dir(&cwd, || {
+            super::status_context(None).expect("status_context should not hard-fail on config parse errors (#143)")
+        });
+
+        // Phase 1 contract: config_load_error is populated with the parse error.
+        let err = context
+            .config_load_error
+            .as_ref()
+            .expect("config_load_error should be Some when config parse fails");
+        assert!(
+            err.contains("mcpServers.missing-command"),
+            "config_load_error should name the malformed field path: {err}"
+        );
+        assert!(
+            err.contains("missing string field command"),
+            "config_load_error should carry the underlying parse error: {err}"
+        );
+
+        // Phase 1 contract: workspace/git/sandbox fields are still populated
+        // (independent of config parse). Sandbox falls back to defaults.
+        assert_eq!(context.cwd, cwd.canonicalize().unwrap_or(cwd.clone()));
+        assert_eq!(
+            context.loaded_config_files, 0,
+            "loaded_config_files should be 0 when config parse fails"
+        );
+        assert!(
+            context.discovered_config_files > 0,
+            "discovered_config_files should still count the file that failed to parse"
+        );
+
+        // JSON output contract: top-level `status: "degraded"` + config_load_error field.
+        let usage = super::StatusUsage {
+            message_count: 0,
+            turns: 0,
+            latest: runtime::TokenUsage::default(),
+            cumulative: runtime::TokenUsage::default(),
+            estimated_tokens: 0,
+        };
+        let json = super::status_json_value(Some("test-model"), usage, "workspace-write", &context);
+        assert_eq!(
+            json.get("status").and_then(|v| v.as_str()),
+            Some("degraded"),
+            "top-level status marker should be 'degraded' when config parse failed: {json}"
+        );
+        assert!(
+            json.get("config_load_error")
+                .and_then(|v| v.as_str())
+                .is_some_and(|s| s.contains("mcpServers.missing-command")),
+            "config_load_error should surface in JSON output: {json}"
+        );
+        // Independent fields still populated.
+        assert_eq!(
+            json.get("model").and_then(|v| v.as_str()),
+            Some("test-model")
+        );
+        assert!(json.get("workspace").is_some(), "workspace field still reported");
+        assert!(json.get("sandbox").is_some(), "sandbox field still reported");
+
+        // Clean path: no config error → status: "ok", config_load_error: null.
+        let clean_cwd = root.join("project-with-clean-config");
+        std::fs::create_dir_all(&clean_cwd).expect("clean project dir");
+        let clean_context = with_current_dir(&clean_cwd, || {
+            super::status_context(None).expect("clean status_context should succeed")
+        });
+        assert!(clean_context.config_load_error.is_none());
+        let clean_json =
+            super::status_json_value(Some("test-model"), usage, "workspace-write", &clean_context);
+        assert_eq!(
+            clean_json.get("status").and_then(|v| v.as_str()),
+            Some("ok"),
+            "clean run should report status: 'ok'"
+        );
+    }
+
    #[test]
    fn state_error_surfaces_actionable_worker_commands_139() {
        // #139: the error for missing `.claw/worker-state.json` must name
@ -10761,6 +10908,7 @@ mod tests {
                    conflicted_files: 0,
                },
                sandbox_status: runtime::SandboxStatus::default(),
+                config_load_error: None,
            },
        );
        assert!(status.contains("Status"));