| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876 |
- use std::collections::BTreeMap;
- use std::fs;
- use std::io::Write;
- use std::os::unix::fs::PermissionsExt;
- use std::path::{Path, PathBuf};
- use std::process::{Command, Output, Stdio};
- use std::sync::atomic::{AtomicU64, Ordering};
- use std::time::{SystemTime, UNIX_EPOCH};
- use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX};
- use serde_json::{json, Value};
- static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
- #[test]
- #[allow(clippy::too_many_lines)]
- fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() {
- let manifest_entries = load_scenario_manifest();
- let manifest = manifest_entries
- .iter()
- .cloned()
- .map(|entry| (entry.name.clone(), entry))
- .collect::<BTreeMap<_, _>>();
- let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
- let server = runtime
- .block_on(MockAnthropicService::spawn())
- .expect("mock service should start");
- let base_url = server.base_url();
- let cases = [
- ScenarioCase {
- name: "streaming_text",
- permission_mode: "read-only",
- allowed_tools: None,
- stdin: None,
- prepare: prepare_noop,
- assert: assert_streaming_text,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "read_file_roundtrip",
- permission_mode: "read-only",
- allowed_tools: Some("read_file"),
- stdin: None,
- prepare: prepare_read_fixture,
- assert: assert_read_file_roundtrip,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "grep_chunk_assembly",
- permission_mode: "read-only",
- allowed_tools: Some("grep_search"),
- stdin: None,
- prepare: prepare_grep_fixture,
- assert: assert_grep_chunk_assembly,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "write_file_allowed",
- permission_mode: "workspace-write",
- allowed_tools: Some("write_file"),
- stdin: None,
- prepare: prepare_noop,
- assert: assert_write_file_allowed,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "write_file_denied",
- permission_mode: "read-only",
- allowed_tools: Some("write_file"),
- stdin: None,
- prepare: prepare_noop,
- assert: assert_write_file_denied,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "multi_tool_turn_roundtrip",
- permission_mode: "read-only",
- allowed_tools: Some("read_file,grep_search"),
- stdin: None,
- prepare: prepare_multi_tool_fixture,
- assert: assert_multi_tool_turn_roundtrip,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "bash_stdout_roundtrip",
- permission_mode: "danger-full-access",
- allowed_tools: Some("bash"),
- stdin: None,
- prepare: prepare_noop,
- assert: assert_bash_stdout_roundtrip,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "bash_permission_prompt_approved",
- permission_mode: "workspace-write",
- allowed_tools: Some("bash"),
- stdin: Some("y\n"),
- prepare: prepare_noop,
- assert: assert_bash_permission_prompt_approved,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "bash_permission_prompt_denied",
- permission_mode: "workspace-write",
- allowed_tools: Some("bash"),
- stdin: Some("n\n"),
- prepare: prepare_noop,
- assert: assert_bash_permission_prompt_denied,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "plugin_tool_roundtrip",
- permission_mode: "workspace-write",
- allowed_tools: None,
- stdin: None,
- prepare: prepare_plugin_fixture,
- assert: assert_plugin_tool_roundtrip,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "auto_compact_triggered",
- permission_mode: "read-only",
- allowed_tools: None,
- stdin: None,
- prepare: prepare_noop,
- assert: assert_auto_compact_triggered,
- extra_env: None,
- resume_session: None,
- },
- ScenarioCase {
- name: "token_cost_reporting",
- permission_mode: "read-only",
- allowed_tools: None,
- stdin: None,
- prepare: prepare_noop,
- assert: assert_token_cost_reporting,
- extra_env: None,
- resume_session: None,
- },
- ];
- let case_names = cases.iter().map(|case| case.name).collect::<Vec<_>>();
- let manifest_names = manifest_entries
- .iter()
- .map(|entry| entry.name.as_str())
- .collect::<Vec<_>>();
- assert_eq!(
- case_names, manifest_names,
- "manifest and harness cases must stay aligned"
- );
- let mut scenario_reports = Vec::new();
- for case in cases {
- let workspace = HarnessWorkspace::new(unique_temp_dir(case.name));
- workspace.create().expect("workspace should exist");
- (case.prepare)(&workspace);
- let run = run_case(case, &workspace, &base_url);
- (case.assert)(&workspace, &run);
- let manifest_entry = manifest
- .get(case.name)
- .unwrap_or_else(|| panic!("missing manifest entry for {}", case.name));
- scenario_reports.push(build_scenario_report(
- case.name,
- manifest_entry,
- &run.response,
- ));
- fs::remove_dir_all(&workspace.root).expect("workspace cleanup should succeed");
- }
- let captured = runtime.block_on(server.captured_requests());
- assert_eq!(
- captured.len(),
- 21,
- "twelve scenarios should produce twenty-one requests"
- );
- assert!(captured
- .iter()
- .all(|request| request.path == "/v1/messages"));
- assert!(captured.iter().all(|request| request.stream));
- let scenarios = captured
- .iter()
- .map(|request| request.scenario.as_str())
- .collect::<Vec<_>>();
- assert_eq!(
- scenarios,
- vec![
- "streaming_text",
- "read_file_roundtrip",
- "read_file_roundtrip",
- "grep_chunk_assembly",
- "grep_chunk_assembly",
- "write_file_allowed",
- "write_file_allowed",
- "write_file_denied",
- "write_file_denied",
- "multi_tool_turn_roundtrip",
- "multi_tool_turn_roundtrip",
- "bash_stdout_roundtrip",
- "bash_stdout_roundtrip",
- "bash_permission_prompt_approved",
- "bash_permission_prompt_approved",
- "bash_permission_prompt_denied",
- "bash_permission_prompt_denied",
- "plugin_tool_roundtrip",
- "plugin_tool_roundtrip",
- "auto_compact_triggered",
- "token_cost_reporting",
- ]
- );
- let mut request_counts = BTreeMap::new();
- for request in &captured {
- *request_counts
- .entry(request.scenario.as_str())
- .or_insert(0_usize) += 1;
- }
- for report in &mut scenario_reports {
- report.request_count = *request_counts
- .get(report.name.as_str())
- .unwrap_or_else(|| panic!("missing request count for {}", report.name));
- }
- maybe_write_report(&scenario_reports);
- }
- #[derive(Clone, Copy)]
- struct ScenarioCase {
- name: &'static str,
- permission_mode: &'static str,
- allowed_tools: Option<&'static str>,
- stdin: Option<&'static str>,
- prepare: fn(&HarnessWorkspace),
- assert: fn(&HarnessWorkspace, &ScenarioRun),
- extra_env: Option<(&'static str, &'static str)>,
- resume_session: Option<&'static str>,
- }
- struct HarnessWorkspace {
- root: PathBuf,
- config_home: PathBuf,
- home: PathBuf,
- }
- impl HarnessWorkspace {
- fn new(root: PathBuf) -> Self {
- Self {
- config_home: root.join("config-home"),
- home: root.join("home"),
- root,
- }
- }
- fn create(&self) -> std::io::Result<()> {
- fs::create_dir_all(&self.root)?;
- fs::create_dir_all(&self.config_home)?;
- fs::create_dir_all(&self.home)?;
- Ok(())
- }
- }
- struct ScenarioRun {
- response: Value,
- stdout: String,
- }
- #[derive(Debug, Clone)]
- struct ScenarioManifestEntry {
- name: String,
- category: String,
- description: String,
- parity_refs: Vec<String>,
- }
- #[derive(Debug)]
- struct ScenarioReport {
- name: String,
- category: String,
- description: String,
- parity_refs: Vec<String>,
- iterations: u64,
- request_count: usize,
- tool_uses: Vec<String>,
- tool_error_count: usize,
- final_message: String,
- }
- fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> ScenarioRun {
- let mut command = Command::new(env!("CARGO_BIN_EXE_claw"));
- command
- .current_dir(&workspace.root)
- .env_clear()
- .env("ANTHROPIC_API_KEY", "test-parity-key")
- .env("ANTHROPIC_BASE_URL", base_url)
- .env("CLAW_CONFIG_HOME", &workspace.config_home)
- .env("HOME", &workspace.home)
- .env("NO_COLOR", "1")
- .env("PATH", "/usr/bin:/bin")
- .args([
- "--model",
- "sonnet",
- "--permission-mode",
- case.permission_mode,
- "--output-format=json",
- ]);
- if let Some(allowed_tools) = case.allowed_tools {
- command.args(["--allowedTools", allowed_tools]);
- }
- if let Some((key, value)) = case.extra_env {
- command.env(key, value);
- }
- if let Some(session_id) = case.resume_session {
- command.args(["--resume", session_id]);
- }
- let prompt = format!("{SCENARIO_PREFIX}{}", case.name);
- command.arg(prompt);
- let output = if let Some(stdin) = case.stdin {
- let mut child = command
- .stdin(Stdio::piped())
- .stdout(Stdio::piped())
- .stderr(Stdio::piped())
- .spawn()
- .expect("claw should launch");
- child
- .stdin
- .as_mut()
- .expect("stdin should be piped")
- .write_all(stdin.as_bytes())
- .expect("stdin should write");
- child.wait_with_output().expect("claw should finish")
- } else {
- command.output().expect("claw should launch")
- };
- assert_success(&output);
- let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
- ScenarioRun {
- response: parse_json_output(&stdout),
- stdout,
- }
- }
- #[allow(dead_code)]
- fn prepare_auto_compact_fixture(workspace: &HarnessWorkspace) {
- let sessions_dir = workspace.root.join(".claw").join("sessions");
- fs::create_dir_all(&sessions_dir).expect("sessions dir should exist");
- // Write a pre-seeded session with 6 messages so auto-compact can remove them
- let session_id = "parity-auto-compact-seed";
- let session_jsonl = r#"{"type":"session_meta","version":3,"session_id":"parity-auto-compact-seed","created_at_ms":1743724800000,"updated_at_ms":1743724800000}
- {"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step one of the parity scenario"}]}}
- {"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step one"}]}}
- {"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step two of the parity scenario"}]}}
- {"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step two"}]}}
- {"type":"message","message":{"role":"user","blocks":[{"type":"text","text":"step three of the parity scenario"}]}}
- {"type":"message","message":{"role":"assistant","blocks":[{"type":"text","text":"acknowledged step three"}]}}
- "#;
- fs::write(
- sessions_dir.join(format!("{session_id}.jsonl")),
- session_jsonl,
- )
- .expect("pre-seeded session should write");
- }
- fn prepare_noop(_: &HarnessWorkspace) {}
- fn prepare_read_fixture(workspace: &HarnessWorkspace) {
- fs::write(workspace.root.join("fixture.txt"), "alpha parity line\n")
- .expect("fixture should write");
- }
- fn prepare_grep_fixture(workspace: &HarnessWorkspace) {
- fs::write(
- workspace.root.join("fixture.txt"),
- "alpha parity line\nbeta line\ngamma parity line\n",
- )
- .expect("grep fixture should write");
- }
- fn prepare_multi_tool_fixture(workspace: &HarnessWorkspace) {
- fs::write(
- workspace.root.join("fixture.txt"),
- "alpha parity line\nbeta line\ngamma parity line\n",
- )
- .expect("multi tool fixture should write");
- }
- fn prepare_plugin_fixture(workspace: &HarnessWorkspace) {
- let plugin_root = workspace
- .root
- .join("external-plugins")
- .join("parity-plugin");
- let tool_dir = plugin_root.join("tools");
- let manifest_dir = plugin_root.join(".claude-plugin");
- fs::create_dir_all(&tool_dir).expect("plugin tools dir");
- fs::create_dir_all(&manifest_dir).expect("plugin manifest dir");
- let script_path = tool_dir.join("echo-json.sh");
- fs::write(
- &script_path,
- "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n",
- )
- .expect("plugin script should write");
- let mut permissions = fs::metadata(&script_path)
- .expect("plugin script metadata")
- .permissions();
- permissions.set_mode(0o755);
- fs::set_permissions(&script_path, permissions).expect("plugin script should be executable");
- fs::write(
- manifest_dir.join("plugin.json"),
- r#"{
- "name": "parity-plugin",
- "version": "1.0.0",
- "description": "mock parity plugin",
- "tools": [
- {
- "name": "plugin_echo",
- "description": "Echo JSON input",
- "inputSchema": {
- "type": "object",
- "properties": {
- "message": { "type": "string" }
- },
- "required": ["message"],
- "additionalProperties": false
- },
- "command": "./tools/echo-json.sh",
- "requiredPermission": "workspace-write"
- }
- ]
- }"#,
- )
- .expect("plugin manifest should write");
- fs::write(
- workspace.config_home.join("settings.json"),
- json!({
- "enabledPlugins": {
- "parity-plugin@external": true
- },
- "plugins": {
- "externalDirectories": [plugin_root.parent().expect("plugin parent").display().to_string()]
- }
- })
- .to_string(),
- )
- .expect("plugin settings should write");
- }
- fn assert_streaming_text(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(
- run.response["message"],
- Value::String("Mock streaming says hello from the parity harness.".to_string())
- );
- assert_eq!(run.response["iterations"], Value::from(1));
- assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
- assert_eq!(run.response["tool_results"], Value::Array(Vec::new()));
- }
- fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_uses"][0]["name"],
- Value::String("read_file".to_string())
- );
- assert_eq!(
- run.response["tool_uses"][0]["input"],
- Value::String(r#"{"path":"fixture.txt"}"#.to_string())
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("alpha parity line"));
- let output = run.response["tool_results"][0]["output"]
- .as_str()
- .expect("tool output");
- assert!(output.contains(&workspace.root.join("fixture.txt").display().to_string()));
- assert!(output.contains("alpha parity line"));
- }
- fn assert_grep_chunk_assembly(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_uses"][0]["name"],
- Value::String("grep_search".to_string())
- );
- assert_eq!(
- run.response["tool_uses"][0]["input"],
- Value::String(
- r#"{"pattern":"parity","path":"fixture.txt","output_mode":"count"}"#.to_string()
- )
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("2 occurrences"));
- assert_eq!(
- run.response["tool_results"][0]["is_error"],
- Value::Bool(false)
- );
- }
- fn assert_write_file_allowed(workspace: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_uses"][0]["name"],
- Value::String("write_file".to_string())
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("generated/output.txt"));
- let generated = workspace.root.join("generated").join("output.txt");
- let contents = fs::read_to_string(&generated).expect("generated file should exist");
- assert_eq!(contents, "created by mock service\n");
- assert_eq!(
- run.response["tool_results"][0]["is_error"],
- Value::Bool(false)
- );
- }
- fn assert_write_file_denied(workspace: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_uses"][0]["name"],
- Value::String("write_file".to_string())
- );
- let tool_output = run.response["tool_results"][0]["output"]
- .as_str()
- .expect("tool output");
- assert!(tool_output.contains("requires workspace-write permission"));
- assert_eq!(
- run.response["tool_results"][0]["is_error"],
- Value::Bool(true)
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("denied as expected"));
- assert!(!workspace.root.join("generated").join("denied.txt").exists());
- }
- fn assert_multi_tool_turn_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- let tool_uses = run.response["tool_uses"]
- .as_array()
- .expect("tool uses array");
- assert_eq!(
- tool_uses.len(),
- 2,
- "expected two tool uses in a single turn"
- );
- assert_eq!(tool_uses[0]["name"], Value::String("read_file".to_string()));
- assert_eq!(
- tool_uses[1]["name"],
- Value::String("grep_search".to_string())
- );
- let tool_results = run.response["tool_results"]
- .as_array()
- .expect("tool results array");
- assert_eq!(
- tool_results.len(),
- 2,
- "expected two tool results in a single turn"
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("alpha parity line"));
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("2 occurrences"));
- }
- fn assert_bash_stdout_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_uses"][0]["name"],
- Value::String("bash".to_string())
- );
- let tool_output = run.response["tool_results"][0]["output"]
- .as_str()
- .expect("tool output");
- let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
- assert_eq!(
- parsed["stdout"],
- Value::String("alpha from bash".to_string())
- );
- assert_eq!(
- run.response["tool_results"][0]["is_error"],
- Value::Bool(false)
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("alpha from bash"));
- }
- fn assert_bash_permission_prompt_approved(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert!(run.stdout.contains("Permission approval required"));
- assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_results"][0]["is_error"],
- Value::Bool(false)
- );
- let tool_output = run.response["tool_results"][0]["output"]
- .as_str()
- .expect("tool output");
- let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
- assert_eq!(
- parsed["stdout"],
- Value::String("approved via prompt".to_string())
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("approved and executed"));
- }
- fn assert_bash_permission_prompt_denied(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert!(run.stdout.contains("Permission approval required"));
- assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
- assert_eq!(run.response["iterations"], Value::from(2));
- let tool_output = run.response["tool_results"][0]["output"]
- .as_str()
- .expect("tool output");
- assert!(tool_output.contains("denied by user approval prompt"));
- assert_eq!(
- run.response["tool_results"][0]["is_error"],
- Value::Bool(true)
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("denied as expected"));
- }
- fn assert_plugin_tool_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(2));
- assert_eq!(
- run.response["tool_uses"][0]["name"],
- Value::String("plugin_echo".to_string())
- );
- let tool_output = run.response["tool_results"][0]["output"]
- .as_str()
- .expect("tool output");
- let parsed: Value = serde_json::from_str(tool_output).expect("plugin output json");
- assert_eq!(
- parsed["plugin"],
- Value::String("parity-plugin@external".to_string())
- );
- assert_eq!(parsed["tool"], Value::String("plugin_echo".to_string()));
- assert_eq!(
- parsed["input"]["message"],
- Value::String("hello from plugin parity".to_string())
- );
- assert!(run.response["message"]
- .as_str()
- .expect("message text")
- .contains("hello from plugin parity"));
- }
- fn assert_auto_compact_triggered(_: &HarnessWorkspace, run: &ScenarioRun) {
- // Validates that the auto_compaction field is present in JSON output (format parity).
- // Trigger behavior is covered by conversation::tests::auto_compacts_when_cumulative_input_threshold_is_crossed.
- assert_eq!(run.response["iterations"], Value::from(1));
- assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
- assert!(
- run.response["message"]
- .as_str()
- .expect("message text")
- .contains("auto compact parity complete."),
- "expected auto compact message in response"
- );
- // auto_compaction key must be present in JSON (may be null for below-threshold sessions)
- assert!(
- run.response.as_object().expect("response object").contains_key("auto_compaction"),
- "auto_compaction key must be present in JSON output"
- );
- // Verify input_tokens field reflects the large mock token counts
- let input_tokens = run.response["usage"]["input_tokens"]
- .as_u64()
- .expect("input_tokens should be present");
- assert!(
- input_tokens >= 50_000,
- "input_tokens should reflect mock service value (got {input_tokens})"
- );
- }
- fn assert_token_cost_reporting(_: &HarnessWorkspace, run: &ScenarioRun) {
- assert_eq!(run.response["iterations"], Value::from(1));
- assert!(
- run.response["message"]
- .as_str()
- .expect("message text")
- .contains("token cost reporting parity complete."),
- );
- let usage = &run.response["usage"];
- assert!(
- usage["input_tokens"].as_u64().unwrap_or(0) > 0,
- "input_tokens should be non-zero"
- );
- assert!(
- usage["output_tokens"].as_u64().unwrap_or(0) > 0,
- "output_tokens should be non-zero"
- );
- assert!(
- run.response["estimated_cost"]
- .as_str()
- .map(|cost| cost.starts_with('$'))
- .unwrap_or(false),
- "estimated_cost should be a dollar-prefixed string"
- );
- }
- fn parse_json_output(stdout: &str) -> Value {
- if let Some(index) = stdout.rfind("{\"auto_compaction\"") {
- return serde_json::from_str(&stdout[index..]).unwrap_or_else(|error| {
- panic!("failed to parse JSON response from stdout: {error}\n{stdout}")
- });
- }
- stdout
- .lines()
- .rev()
- .find_map(|line| {
- let trimmed = line.trim();
- if trimmed.starts_with('{') && trimmed.ends_with('}') {
- serde_json::from_str(trimmed).ok()
- } else {
- None
- }
- })
- .unwrap_or_else(|| panic!("no JSON response line found in stdout:\n{stdout}"))
- }
- fn build_scenario_report(
- name: &str,
- manifest_entry: &ScenarioManifestEntry,
- response: &Value,
- ) -> ScenarioReport {
- ScenarioReport {
- name: name.to_string(),
- category: manifest_entry.category.clone(),
- description: manifest_entry.description.clone(),
- parity_refs: manifest_entry.parity_refs.clone(),
- iterations: response["iterations"]
- .as_u64()
- .expect("iterations should exist"),
- request_count: 0,
- tool_uses: response["tool_uses"]
- .as_array()
- .expect("tool uses array")
- .iter()
- .filter_map(|value| value["name"].as_str().map(ToOwned::to_owned))
- .collect(),
- tool_error_count: response["tool_results"]
- .as_array()
- .expect("tool results array")
- .iter()
- .filter(|value| value["is_error"].as_bool().unwrap_or(false))
- .count(),
- final_message: response["message"]
- .as_str()
- .expect("message text")
- .to_string(),
- }
- }
- fn maybe_write_report(reports: &[ScenarioReport]) {
- let Some(path) = std::env::var_os("MOCK_PARITY_REPORT_PATH") else {
- return;
- };
- let payload = json!({
- "scenario_count": reports.len(),
- "request_count": reports.iter().map(|report| report.request_count).sum::<usize>(),
- "scenarios": reports.iter().map(scenario_report_json).collect::<Vec<_>>(),
- });
- fs::write(
- path,
- serde_json::to_vec_pretty(&payload).expect("report json should serialize"),
- )
- .expect("report should write");
- }
- fn load_scenario_manifest() -> Vec<ScenarioManifestEntry> {
- let manifest_path =
- Path::new(env!("CARGO_MANIFEST_DIR")).join("../../mock_parity_scenarios.json");
- let manifest = fs::read_to_string(&manifest_path).expect("scenario manifest should exist");
- serde_json::from_str::<Vec<Value>>(&manifest)
- .expect("scenario manifest should parse")
- .into_iter()
- .map(|entry| ScenarioManifestEntry {
- name: entry["name"]
- .as_str()
- .expect("scenario name should be a string")
- .to_string(),
- category: entry["category"]
- .as_str()
- .expect("scenario category should be a string")
- .to_string(),
- description: entry["description"]
- .as_str()
- .expect("scenario description should be a string")
- .to_string(),
- parity_refs: entry["parity_refs"]
- .as_array()
- .expect("parity refs should be an array")
- .iter()
- .map(|value| {
- value
- .as_str()
- .expect("parity ref should be a string")
- .to_string()
- })
- .collect(),
- })
- .collect()
- }
- fn scenario_report_json(report: &ScenarioReport) -> Value {
- json!({
- "name": report.name,
- "category": report.category,
- "description": report.description,
- "parity_refs": report.parity_refs,
- "iterations": report.iterations,
- "request_count": report.request_count,
- "tool_uses": report.tool_uses,
- "tool_error_count": report.tool_error_count,
- "final_message": report.final_message,
- })
- }
- fn assert_success(output: &Output) {
- assert!(
- output.status.success(),
- "stdout:\n{}\n\nstderr:\n{}",
- String::from_utf8_lossy(&output.stdout),
- String::from_utf8_lossy(&output.stderr)
- );
- }
- fn unique_temp_dir(label: &str) -> PathBuf {
- let millis = SystemTime::now()
- .duration_since(UNIX_EPOCH)
- .expect("clock should be after epoch")
- .as_millis();
- let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
- std::env::temp_dir().join(format!(
- "claw-mock-parity-{label}-{}-{millis}-{counter}",
- std::process::id()
- ))
- }
|