gogadmin
/
claw-code


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876
							use std::collections::BTreeMap;
use std::fs;
use std::io::Write;
use std::os::unix::fs::PermissionsExt;
use std::path::{Path, PathBuf};
use std::process::{Command, Output, Stdio};
use std::sync::atomic::{AtomicU64, Ordering};
use std::time::{SystemTime, UNIX_EPOCH};

use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX};
use serde_json::{json, Value};

static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);

#[test]
#[allow(clippy::too_many_lines)]
fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() {
    let manifest_entries = load_scenario_manifest();
    let manifest = manifest_entries
        .iter()
        .cloned()
        .map(|entry| (entry.name.clone(), entry))
        .collect::<BTreeMap<_, _>>();
    let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
    let server = runtime
        .block_on(MockAnthropicService::spawn())
        .expect("mock service should start");
    let base_url = server.base_url();

    let cases = [
        ScenarioCase {
            name: "streaming_text",
            permission_mode: "read-only",
            allowed_tools: None,
            stdin: None,
            prepare: prepare_noop,
            assert: assert_streaming_text,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "read_file_roundtrip",
            permission_mode: "read-only",
            allowed_tools: Some("read_file"),
            stdin: None,
            prepare: prepare_read_fixture,
            assert: assert_read_file_roundtrip,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "grep_chunk_assembly",
            permission_mode: "read-only",
            allowed_tools: Some("grep_search"),
            stdin: None,
            prepare: prepare_grep_fixture,
            assert: assert_grep_chunk_assembly,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "write_file_allowed",
            permission_mode: "workspace-write",
            allowed_tools: Some("write_file"),
            stdin: None,
            prepare: prepare_noop,
            assert: assert_write_file_allowed,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "write_file_denied",
            permission_mode: "read-only",
            allowed_tools: Some("write_file"),
            stdin: None,
            prepare: prepare_noop,
            assert: assert_write_file_denied,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "multi_tool_turn_roundtrip",
            permission_mode: "read-only",
            allowed_tools: Some("read_file,grep_search"),
            stdin: None,
            prepare: prepare_multi_tool_fixture,
            assert: assert_multi_tool_turn_roundtrip,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "bash_stdout_roundtrip",
            permission_mode: "danger-full-access",
            allowed_tools: Some("bash"),
            stdin: None,
            prepare: prepare_noop,
            assert: assert_bash_stdout_roundtrip,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "bash_permission_prompt_approved",
            permission_mode: "workspace-write",
            allowed_tools: Some("bash"),
            stdin: Some("y\n"),
            prepare: prepare_noop,
            assert: assert_bash_permission_prompt_approved,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "bash_permission_prompt_denied",
            permission_mode: "workspace-write",
            allowed_tools: Some("bash"),
            stdin: Some("n\n"),
            prepare: prepare_noop,
            assert: assert_bash_permission_prompt_denied,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "plugin_tool_roundtrip",
            permission_mode: "workspace-write",
            allowed_tools: None,
            stdin: None,
            prepare: prepare_plugin_fixture,
            assert: assert_plugin_tool_roundtrip,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "auto_compact_triggered",
            permission_mode: "read-only",
            allowed_tools: None,
            stdin: None,
            prepare: prepare_noop,
            assert: assert_auto_compact_triggered,
            extra_env: None,
            resume_session: None,
        },
        ScenarioCase {
            name: "token_cost_reporting",
            permission_mode: "read-only",
            allowed_tools: None,
            stdin: None,
            prepare: prepare_noop,
            assert: assert_token_cost_reporting,
            extra_env: None,
            resume_session: None,
        },
    ];

    let case_names = cases.iter().map(|case| case.name).collect::<Vec<_>>();
    let manifest_names = manifest_entries
        .iter()
        .map(|entry| entry.name.as_str())
        .collect::<Vec<_>>();
    assert_eq!(
        case_names, manifest_names,
        "manifest and harness cases must stay aligned"
    );

    let mut scenario_reports = Vec::new();

    for case in cases {
        let workspace = HarnessWorkspace::new(unique_temp_dir(case.name));
        workspace.create().expect("workspace should exist");
        (case.prepare)(&workspace);

        let run = run_case(case, &workspace, &base_url);
        (case.assert)(&workspace, &run);

        let manifest_entry = manifest
            .get(case.name)
            .unwrap_or_else(|| panic!("missing manifest entry for {}", case.name));
        scenario_reports.push(build_scenario_report(
            case.name,
            manifest_entry,
            &run.response,
        ));

        fs::remove_dir_all(&workspace.root).expect("workspace cleanup should succeed");
    }

    let captured = runtime.block_on(server.captured_requests());
    assert_eq!(
        captured.len(),
        21,
        "twelve scenarios should produce twenty-one requests"
    );
    assert!(captured
        .iter()
        .all(|request| request.path == "/v1/messages"));
    assert!(captured.iter().all(|request| request.stream));

    let scenarios = captured
        .iter()
        .map(|request| request.scenario.as_str())
        .collect::<Vec<_>>();
    assert_eq!(
        scenarios,
        vec![
            "streaming_text",
            "read_file_roundtrip",
            "read_file_roundtrip",
            "grep_chunk_assembly",
            "grep_chunk_assembly",
            "write_file_allowed",
            "write_file_allowed",
            "write_file_denied",
            "write_file_denied",
            "multi_tool_turn_roundtrip",
            "multi_tool_turn_roundtrip",
            "bash_stdout_roundtrip",
            "bash_stdout_roundtrip",
            "bash_permission_prompt_approved",
            "bash_permission_prompt_approved",
            "bash_permission_prompt_denied",
            "bash_permission_prompt_denied",
            "plugin_tool_roundtrip",
            "plugin_tool_roundtrip",
            "auto_compact_triggered",
            "token_cost_reporting",
        ]
    );

    let mut request_counts = BTreeMap::new();
    for request in &captured {
        *request_counts
            .entry(request.scenario.as_str())
            .or_insert(0_usize) += 1;
    }
    for report in &mut scenario_reports {
        report.request_count = *request_counts
            .get(report.name.as_str())
            .unwrap_or_else(|| panic!("missing request count for {}", report.name));
    }

    maybe_write_report(&scenario_reports);
}

#[derive(Clone, Copy)]
struct ScenarioCase {
    name: &'static str,
    permission_mode: &'static str,
    allowed_tools: Option<&'static str>,
    stdin: Option<&'static str>,
    prepare: fn(&HarnessWorkspace),
    assert: fn(&HarnessWorkspace, &ScenarioRun),
    extra_env: Option<(&'static str, &'static str)>,
    resume_session: Option<&'static str>,
}

struct HarnessWorkspace {
    root: PathBuf,
    config_home: PathBuf,
    home: PathBuf,
}

impl HarnessWorkspace {
    fn new(root: PathBuf) -> Self {
        Self {
            config_home: root.join("config-home"),
            home: root.join("home"),
            root,
        }
    }

    fn create(&self) -> std::io::Result<()> {
        fs::create_dir_all(&self.root)?;
        fs::create_dir_all(&self.config_home)?;
        fs::create_dir_all(&self.home)?;
        Ok(())
    }
}

struct ScenarioRun {
    response: Value,
    stdout: String,
}

#[derive(Debug, Clone)]
struct ScenarioManifestEntry {
    name: String,
    category: String,
    description: String,
    parity_refs: Vec<String>,
}

#[derive(Debug)]
struct ScenarioReport {
    name: String,
    category: String,
    description: String,
    parity_refs: Vec<String>,
    iterations: u64,
    request_count: usize,
    tool_uses: Vec<String>,
    tool_error_count: usize,
    final_message: String,
}

fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> ScenarioRun {
    let mut command = Command::new(env!("CARGO_BIN_EXE_claw"));
    command
        .current_dir(&workspace.root)
        .env_clear()
        .env("ANTHROPIC_API_KEY", "test-parity-key")
        .env("ANTHROPIC_BASE_URL", base_url)
        .env("CLAW_CONFIG_HOME", &workspace.config_home)
        .env("HOME", &workspace.home)
        .env("NO_COLOR", "1")
        .env("PATH", "/usr/bin:/bin")
        .args([
            "--model",
            "sonnet",
            "--permission-mode",
            case.permission_mode,
            "--output-format=json",
        ]);

    if let Some(allowed_tools) = case.allowed_tools {
        command.args(["--allowedTools", allowed_tools]);
    }
    if let Some((key, value)) = case.extra_env {
        command.env(key, value);
    }
    if let Some(session_id) = case.resume_session {
        command.args(["--resume", session_id]);
    }

    let prompt = format!("{SCENARIO_PREFIX}{}", case.name);
    command.arg(prompt);

    let output = if let Some(stdin) = case.stdin {
        let mut child = command
            .stdin(Stdio::piped())
            .stdout(Stdio::piped())
            .stderr(Stdio::piped())
            .spawn()
            .expect("claw should launch");
        child
            .stdin
            .as_mut()
            .expect("stdin should be piped")
            .write_all(stdin.as_bytes())
            .expect("stdin should write");
        child.wait_with_output().expect("claw should finish")
    } else {
        command.output().expect("claw should launch")
    };

    assert_success(&output);
    let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
    ScenarioRun {
        response: parse_json_output(&stdout),
        stdout,
    }
}

#[allow(dead_code)]
fn prepare_auto_compact_fixture(workspace: &HarnessWorkspace) {
    let sessions_dir = workspace.root.join(".claw").join("sessions");
    fs::create_dir_all(&sessions_dir).expect("sessions dir should exist");

    // Write a pre-seeded session with 6 messages so auto-compact can remove them
    let session_id = "parity-auto-compact-seed";
    let session_jsonl = r#"{"type":"session_meta","version":3,"session_id":"parity-auto-compact-seed","created_at_ms":1743724800000,"updated_at_ms":1743724800000}
{"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step one of the parity scenario"}]}}
{"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step one"}]}}
{"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step two of the parity scenario"}]}}
{"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step two"}]}}
{"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step three of the parity scenario"}]}}
{"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step three"}]}}
"#;
    fs::write(
        sessions_dir.join(format!("{session_id}.jsonl")),
        session_jsonl,
    )
    .expect("pre-seeded session should write");
}

fn prepare_noop(_: &HarnessWorkspace) {}

fn prepare_read_fixture(workspace: &HarnessWorkspace) {
    fs::write(workspace.root.join("fixture.txt"), "alpha parity line\n")
        .expect("fixture should write");
}

fn prepare_grep_fixture(workspace: &HarnessWorkspace) {
    fs::write(
        workspace.root.join("fixture.txt"),
        "alpha parity line\nbeta line\ngamma parity line\n",
    )
    .expect("grep fixture should write");
}

fn prepare_multi_tool_fixture(workspace: &HarnessWorkspace) {
    fs::write(
        workspace.root.join("fixture.txt"),
        "alpha parity line\nbeta line\ngamma parity line\n",
    )
    .expect("multi tool fixture should write");
}

fn prepare_plugin_fixture(workspace: &HarnessWorkspace) {
    let plugin_root = workspace
        .root
        .join("external-plugins")
        .join("parity-plugin");
    let tool_dir = plugin_root.join("tools");
    let manifest_dir = plugin_root.join(".claude-plugin");
    fs::create_dir_all(&tool_dir).expect("plugin tools dir");
    fs::create_dir_all(&manifest_dir).expect("plugin manifest dir");

    let script_path = tool_dir.join("echo-json.sh");
    fs::write(
        &script_path,
        "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n",
    )
    .expect("plugin script should write");
    let mut permissions = fs::metadata(&script_path)
        .expect("plugin script metadata")
        .permissions();
    permissions.set_mode(0o755);
    fs::set_permissions(&script_path, permissions).expect("plugin script should be executable");

    fs::write(
        manifest_dir.join("plugin.json"),
        r#"{
  "name": "parity-plugin",
  "version": "1.0.0",
  "description": "mock parity plugin",
  "tools": [
    {
      "name": "plugin_echo",
      "description": "Echo JSON input",
      "inputSchema": {
        "type": "object",
        "properties": {
          "message": { "type": "string" }
        },
        "required": ["message"],
        "additionalProperties": false
      },
      "command": "./tools/echo-json.sh",
      "requiredPermission": "workspace-write"
    }
  ]
}"#,
    )
    .expect("plugin manifest should write");

    fs::write(
        workspace.config_home.join("settings.json"),
        json!({
            "enabledPlugins": {
                "parity-plugin@external": true
            },
            "plugins": {
                "externalDirectories": [plugin_root.parent().expect("plugin parent").display().to_string()]
            }
        })
        .to_string(),
    )
    .expect("plugin settings should write");
}

fn assert_streaming_text(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(
        run.response["message"],
        Value::String("Mock streaming says hello from the parity harness.".to_string())
    );
    assert_eq!(run.response["iterations"], Value::from(1));
    assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
    assert_eq!(run.response["tool_results"], Value::Array(Vec::new()));
}

fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_uses"][0]["name"],
        Value::String("read_file".to_string())
    );
    assert_eq!(
        run.response["tool_uses"][0]["input"],
        Value::String(r#"{"path":"fixture.txt"}"#.to_string())
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("alpha parity line"));
    let output = run.response["tool_results"][0]["output"]
        .as_str()
        .expect("tool output");
    assert!(output.contains(&workspace.root.join("fixture.txt").display().to_string()));
    assert!(output.contains("alpha parity line"));
}

fn assert_grep_chunk_assembly(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_uses"][0]["name"],
        Value::String("grep_search".to_string())
    );
    assert_eq!(
        run.response["tool_uses"][0]["input"],
        Value::String(
            r#"{"pattern":"parity","path":"fixture.txt","output_mode":"count"}"#.to_string()
        )
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("2 occurrences"));
    assert_eq!(
        run.response["tool_results"][0]["is_error"],
        Value::Bool(false)
    );
}

fn assert_write_file_allowed(workspace: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_uses"][0]["name"],
        Value::String("write_file".to_string())
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("generated/output.txt"));
    let generated = workspace.root.join("generated").join("output.txt");
    let contents = fs::read_to_string(&generated).expect("generated file should exist");
    assert_eq!(contents, "created by mock service\n");
    assert_eq!(
        run.response["tool_results"][0]["is_error"],
        Value::Bool(false)
    );
}

fn assert_write_file_denied(workspace: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_uses"][0]["name"],
        Value::String("write_file".to_string())
    );
    let tool_output = run.response["tool_results"][0]["output"]
        .as_str()
        .expect("tool output");
    assert!(tool_output.contains("requires workspace-write permission"));
    assert_eq!(
        run.response["tool_results"][0]["is_error"],
        Value::Bool(true)
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("denied as expected"));
    assert!(!workspace.root.join("generated").join("denied.txt").exists());
}

fn assert_multi_tool_turn_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    let tool_uses = run.response["tool_uses"]
        .as_array()
        .expect("tool uses array");
    assert_eq!(
        tool_uses.len(),
        2,
        "expected two tool uses in a single turn"
    );
    assert_eq!(tool_uses[0]["name"], Value::String("read_file".to_string()));
    assert_eq!(
        tool_uses[1]["name"],
        Value::String("grep_search".to_string())
    );
    let tool_results = run.response["tool_results"]
        .as_array()
        .expect("tool results array");
    assert_eq!(
        tool_results.len(),
        2,
        "expected two tool results in a single turn"
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("alpha parity line"));
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("2 occurrences"));
}

fn assert_bash_stdout_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_uses"][0]["name"],
        Value::String("bash".to_string())
    );
    let tool_output = run.response["tool_results"][0]["output"]
        .as_str()
        .expect("tool output");
    let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
    assert_eq!(
        parsed["stdout"],
        Value::String("alpha from bash".to_string())
    );
    assert_eq!(
        run.response["tool_results"][0]["is_error"],
        Value::Bool(false)
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("alpha from bash"));
}

fn assert_bash_permission_prompt_approved(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert!(run.stdout.contains("Permission approval required"));
    assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_results"][0]["is_error"],
        Value::Bool(false)
    );
    let tool_output = run.response["tool_results"][0]["output"]
        .as_str()
        .expect("tool output");
    let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
    assert_eq!(
        parsed["stdout"],
        Value::String("approved via prompt".to_string())
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("approved and executed"));
}

fn assert_bash_permission_prompt_denied(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert!(run.stdout.contains("Permission approval required"));
    assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
    assert_eq!(run.response["iterations"], Value::from(2));
    let tool_output = run.response["tool_results"][0]["output"]
        .as_str()
        .expect("tool output");
    assert!(tool_output.contains("denied by user approval prompt"));
    assert_eq!(
        run.response["tool_results"][0]["is_error"],
        Value::Bool(true)
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("denied as expected"));
}

fn assert_plugin_tool_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(2));
    assert_eq!(
        run.response["tool_uses"][0]["name"],
        Value::String("plugin_echo".to_string())
    );
    let tool_output = run.response["tool_results"][0]["output"]
        .as_str()
        .expect("tool output");
    let parsed: Value = serde_json::from_str(tool_output).expect("plugin output json");
    assert_eq!(
        parsed["plugin"],
        Value::String("parity-plugin@external".to_string())
    );
    assert_eq!(parsed["tool"], Value::String("plugin_echo".to_string()));
    assert_eq!(
        parsed["input"]["message"],
        Value::String("hello from plugin parity".to_string())
    );
    assert!(run.response["message"]
        .as_str()
        .expect("message text")
        .contains("hello from plugin parity"));
}

fn assert_auto_compact_triggered(_: &HarnessWorkspace, run: &ScenarioRun) {
    // Validates that the auto_compaction field is present in JSON output (format parity).
    // Trigger behavior is covered by conversation::tests::auto_compacts_when_cumulative_input_threshold_is_crossed.
    assert_eq!(run.response["iterations"], Value::from(1));
    assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
    assert!(
        run.response["message"]
            .as_str()
            .expect("message text")
            .contains("auto compact parity complete."),
        "expected auto compact message in response"
    );
    // auto_compaction key must be present in JSON (may be null for below-threshold sessions)
    assert!(
        run.response.as_object().expect("response object").contains_key("auto_compaction"),
        "auto_compaction key must be present in JSON output"
    );
    // Verify input_tokens field reflects the large mock token counts
    let input_tokens = run.response["usage"]["input_tokens"]
        .as_u64()
        .expect("input_tokens should be present");
    assert!(
        input_tokens >= 50_000,
        "input_tokens should reflect mock service value (got {input_tokens})"
    );
}

fn assert_token_cost_reporting(_: &HarnessWorkspace, run: &ScenarioRun) {
    assert_eq!(run.response["iterations"], Value::from(1));
    assert!(
        run.response["message"]
            .as_str()
            .expect("message text")
            .contains("token cost reporting parity complete."),
    );
    let usage = &run.response["usage"];
    assert!(
        usage["input_tokens"].as_u64().unwrap_or(0) > 0,
        "input_tokens should be non-zero"
    );
    assert!(
        usage["output_tokens"].as_u64().unwrap_or(0) > 0,
        "output_tokens should be non-zero"
    );
    assert!(
        run.response["estimated_cost"]
            .as_str()
            .map(|cost| cost.starts_with('$'))
            .unwrap_or(false),
        "estimated_cost should be a dollar-prefixed string"
    );
}

fn parse_json_output(stdout: &str) -> Value {
    if let Some(index) = stdout.rfind("{\"auto_compaction\"") {
        return serde_json::from_str(&stdout[index..]).unwrap_or_else(|error| {
            panic!("failed to parse JSON response from stdout: {error}\n{stdout}")
        });
    }

    stdout
        .lines()
        .rev()
        .find_map(|line| {
            let trimmed = line.trim();
            if trimmed.starts_with('{') && trimmed.ends_with('}') {
                serde_json::from_str(trimmed).ok()
            } else {
                None
            }
        })
        .unwrap_or_else(|| panic!("no JSON response line found in stdout:\n{stdout}"))
}

fn build_scenario_report(
    name: &str,
    manifest_entry: &ScenarioManifestEntry,
    response: &Value,
) -> ScenarioReport {
    ScenarioReport {
        name: name.to_string(),
        category: manifest_entry.category.clone(),
        description: manifest_entry.description.clone(),
        parity_refs: manifest_entry.parity_refs.clone(),
        iterations: response["iterations"]
            .as_u64()
            .expect("iterations should exist"),
        request_count: 0,
        tool_uses: response["tool_uses"]
            .as_array()
            .expect("tool uses array")
            .iter()
            .filter_map(|value| value["name"].as_str().map(ToOwned::to_owned))
            .collect(),
        tool_error_count: response["tool_results"]
            .as_array()
            .expect("tool results array")
            .iter()
            .filter(|value| value["is_error"].as_bool().unwrap_or(false))
            .count(),
        final_message: response["message"]
            .as_str()
            .expect("message text")
            .to_string(),
    }
}

fn maybe_write_report(reports: &[ScenarioReport]) {
    let Some(path) = std::env::var_os("MOCK_PARITY_REPORT_PATH") else {
        return;
    };

    let payload = json!({
        "scenario_count": reports.len(),
        "request_count": reports.iter().map(|report| report.request_count).sum::<usize>(),
        "scenarios": reports.iter().map(scenario_report_json).collect::<Vec<_>>(),
    });
    fs::write(
        path,
        serde_json::to_vec_pretty(&payload).expect("report json should serialize"),
    )
    .expect("report should write");
}

fn load_scenario_manifest() -> Vec<ScenarioManifestEntry> {
    let manifest_path =
        Path::new(env!("CARGO_MANIFEST_DIR")).join("../../mock_parity_scenarios.json");
    let manifest = fs::read_to_string(&manifest_path).expect("scenario manifest should exist");
    serde_json::from_str::<Vec<Value>>(&manifest)
        .expect("scenario manifest should parse")
        .into_iter()
        .map(|entry| ScenarioManifestEntry {
            name: entry["name"]
                .as_str()
                .expect("scenario name should be a string")
                .to_string(),
            category: entry["category"]
                .as_str()
                .expect("scenario category should be a string")
                .to_string(),
            description: entry["description"]
                .as_str()
                .expect("scenario description should be a string")
                .to_string(),
            parity_refs: entry["parity_refs"]
                .as_array()
                .expect("parity refs should be an array")
                .iter()
                .map(|value| {
                    value
                        .as_str()
                        .expect("parity ref should be a string")
                        .to_string()
                })
                .collect(),
        })
        .collect()
}

fn scenario_report_json(report: &ScenarioReport) -> Value {
    json!({
        "name": report.name,
        "category": report.category,
        "description": report.description,
        "parity_refs": report.parity_refs,
        "iterations": report.iterations,
        "request_count": report.request_count,
        "tool_uses": report.tool_uses,
        "tool_error_count": report.tool_error_count,
        "final_message": report.final_message,
    })
}

fn assert_success(output: &Output) {
    assert!(
        output.status.success(),
        "stdout:\n{}\n\nstderr:\n{}",
        String::from_utf8_lossy(&output.stdout),
        String::from_utf8_lossy(&output.stderr)
    );
}

fn unique_temp_dir(label: &str) -> PathBuf {
    let millis = SystemTime::now()
        .duration_since(UNIX_EPOCH)
        .expect("clock should be after epoch")
        .as_millis();
    let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
    std::env::temp_dir().join(format!(
        "claw-mock-parity-{label}-{}-{millis}-{counter}",
        std::process::id()
    ))
}