mock_parity_harness.rs 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751
  1. use std::collections::BTreeMap;
  2. use std::fs;
  3. use std::io::Write;
  4. use std::os::unix::fs::PermissionsExt;
  5. use std::path::{Path, PathBuf};
  6. use std::process::{Command, Output, Stdio};
  7. use std::sync::atomic::{AtomicU64, Ordering};
  8. use std::time::{SystemTime, UNIX_EPOCH};
  9. use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX};
  10. use serde_json::{json, Value};
  11. static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
  12. #[test]
  13. #[allow(clippy::too_many_lines)]
  14. fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() {
  15. let manifest_entries = load_scenario_manifest();
  16. let manifest = manifest_entries
  17. .iter()
  18. .cloned()
  19. .map(|entry| (entry.name.clone(), entry))
  20. .collect::<BTreeMap<_, _>>();
  21. let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
  22. let server = runtime
  23. .block_on(MockAnthropicService::spawn())
  24. .expect("mock service should start");
  25. let base_url = server.base_url();
  26. let cases = [
  27. ScenarioCase {
  28. name: "streaming_text",
  29. permission_mode: "read-only",
  30. allowed_tools: None,
  31. stdin: None,
  32. prepare: prepare_noop,
  33. assert: assert_streaming_text,
  34. },
  35. ScenarioCase {
  36. name: "read_file_roundtrip",
  37. permission_mode: "read-only",
  38. allowed_tools: Some("read_file"),
  39. stdin: None,
  40. prepare: prepare_read_fixture,
  41. assert: assert_read_file_roundtrip,
  42. },
  43. ScenarioCase {
  44. name: "grep_chunk_assembly",
  45. permission_mode: "read-only",
  46. allowed_tools: Some("grep_search"),
  47. stdin: None,
  48. prepare: prepare_grep_fixture,
  49. assert: assert_grep_chunk_assembly,
  50. },
  51. ScenarioCase {
  52. name: "write_file_allowed",
  53. permission_mode: "workspace-write",
  54. allowed_tools: Some("write_file"),
  55. stdin: None,
  56. prepare: prepare_noop,
  57. assert: assert_write_file_allowed,
  58. },
  59. ScenarioCase {
  60. name: "write_file_denied",
  61. permission_mode: "read-only",
  62. allowed_tools: Some("write_file"),
  63. stdin: None,
  64. prepare: prepare_noop,
  65. assert: assert_write_file_denied,
  66. },
  67. ScenarioCase {
  68. name: "multi_tool_turn_roundtrip",
  69. permission_mode: "read-only",
  70. allowed_tools: Some("read_file,grep_search"),
  71. stdin: None,
  72. prepare: prepare_multi_tool_fixture,
  73. assert: assert_multi_tool_turn_roundtrip,
  74. },
  75. ScenarioCase {
  76. name: "bash_stdout_roundtrip",
  77. permission_mode: "danger-full-access",
  78. allowed_tools: Some("bash"),
  79. stdin: None,
  80. prepare: prepare_noop,
  81. assert: assert_bash_stdout_roundtrip,
  82. },
  83. ScenarioCase {
  84. name: "bash_permission_prompt_approved",
  85. permission_mode: "workspace-write",
  86. allowed_tools: Some("bash"),
  87. stdin: Some("y\n"),
  88. prepare: prepare_noop,
  89. assert: assert_bash_permission_prompt_approved,
  90. },
  91. ScenarioCase {
  92. name: "bash_permission_prompt_denied",
  93. permission_mode: "workspace-write",
  94. allowed_tools: Some("bash"),
  95. stdin: Some("n\n"),
  96. prepare: prepare_noop,
  97. assert: assert_bash_permission_prompt_denied,
  98. },
  99. ScenarioCase {
  100. name: "plugin_tool_roundtrip",
  101. permission_mode: "workspace-write",
  102. allowed_tools: None,
  103. stdin: None,
  104. prepare: prepare_plugin_fixture,
  105. assert: assert_plugin_tool_roundtrip,
  106. },
  107. ];
  108. let case_names = cases.iter().map(|case| case.name).collect::<Vec<_>>();
  109. let manifest_names = manifest_entries
  110. .iter()
  111. .map(|entry| entry.name.as_str())
  112. .collect::<Vec<_>>();
  113. assert_eq!(
  114. case_names, manifest_names,
  115. "manifest and harness cases must stay aligned"
  116. );
  117. let mut scenario_reports = Vec::new();
  118. for case in cases {
  119. let workspace = HarnessWorkspace::new(unique_temp_dir(case.name));
  120. workspace.create().expect("workspace should exist");
  121. (case.prepare)(&workspace);
  122. let run = run_case(case, &workspace, &base_url);
  123. (case.assert)(&workspace, &run);
  124. let manifest_entry = manifest
  125. .get(case.name)
  126. .unwrap_or_else(|| panic!("missing manifest entry for {}", case.name));
  127. scenario_reports.push(build_scenario_report(
  128. case.name,
  129. manifest_entry,
  130. &run.response,
  131. ));
  132. fs::remove_dir_all(&workspace.root).expect("workspace cleanup should succeed");
  133. }
  134. let captured = runtime.block_on(server.captured_requests());
  135. assert_eq!(
  136. captured.len(),
  137. 19,
  138. "ten scenarios should produce nineteen requests"
  139. );
  140. assert!(captured
  141. .iter()
  142. .all(|request| request.path == "/v1/messages"));
  143. assert!(captured.iter().all(|request| request.stream));
  144. let scenarios = captured
  145. .iter()
  146. .map(|request| request.scenario.as_str())
  147. .collect::<Vec<_>>();
  148. assert_eq!(
  149. scenarios,
  150. vec![
  151. "streaming_text",
  152. "read_file_roundtrip",
  153. "read_file_roundtrip",
  154. "grep_chunk_assembly",
  155. "grep_chunk_assembly",
  156. "write_file_allowed",
  157. "write_file_allowed",
  158. "write_file_denied",
  159. "write_file_denied",
  160. "multi_tool_turn_roundtrip",
  161. "multi_tool_turn_roundtrip",
  162. "bash_stdout_roundtrip",
  163. "bash_stdout_roundtrip",
  164. "bash_permission_prompt_approved",
  165. "bash_permission_prompt_approved",
  166. "bash_permission_prompt_denied",
  167. "bash_permission_prompt_denied",
  168. "plugin_tool_roundtrip",
  169. "plugin_tool_roundtrip",
  170. ]
  171. );
  172. let mut request_counts = BTreeMap::new();
  173. for request in &captured {
  174. *request_counts
  175. .entry(request.scenario.as_str())
  176. .or_insert(0_usize) += 1;
  177. }
  178. for report in &mut scenario_reports {
  179. report.request_count = *request_counts
  180. .get(report.name.as_str())
  181. .unwrap_or_else(|| panic!("missing request count for {}", report.name));
  182. }
  183. maybe_write_report(&scenario_reports);
  184. }
  185. #[derive(Clone, Copy)]
  186. struct ScenarioCase {
  187. name: &'static str,
  188. permission_mode: &'static str,
  189. allowed_tools: Option<&'static str>,
  190. stdin: Option<&'static str>,
  191. prepare: fn(&HarnessWorkspace),
  192. assert: fn(&HarnessWorkspace, &ScenarioRun),
  193. }
  194. struct HarnessWorkspace {
  195. root: PathBuf,
  196. config_home: PathBuf,
  197. home: PathBuf,
  198. }
  199. impl HarnessWorkspace {
  200. fn new(root: PathBuf) -> Self {
  201. Self {
  202. config_home: root.join("config-home"),
  203. home: root.join("home"),
  204. root,
  205. }
  206. }
  207. fn create(&self) -> std::io::Result<()> {
  208. fs::create_dir_all(&self.root)?;
  209. fs::create_dir_all(&self.config_home)?;
  210. fs::create_dir_all(&self.home)?;
  211. Ok(())
  212. }
  213. }
  214. struct ScenarioRun {
  215. response: Value,
  216. stdout: String,
  217. }
  218. #[derive(Debug, Clone)]
  219. struct ScenarioManifestEntry {
  220. name: String,
  221. category: String,
  222. description: String,
  223. parity_refs: Vec<String>,
  224. }
  225. #[derive(Debug)]
  226. struct ScenarioReport {
  227. name: String,
  228. category: String,
  229. description: String,
  230. parity_refs: Vec<String>,
  231. iterations: u64,
  232. request_count: usize,
  233. tool_uses: Vec<String>,
  234. tool_error_count: usize,
  235. final_message: String,
  236. }
  237. fn run_case(case: ScenarioCase, workspace: &HarnessWorkspace, base_url: &str) -> ScenarioRun {
  238. let mut command = Command::new(env!("CARGO_BIN_EXE_claw"));
  239. command
  240. .current_dir(&workspace.root)
  241. .env_clear()
  242. .env("ANTHROPIC_API_KEY", "test-parity-key")
  243. .env("ANTHROPIC_BASE_URL", base_url)
  244. .env("CLAW_CONFIG_HOME", &workspace.config_home)
  245. .env("HOME", &workspace.home)
  246. .env("NO_COLOR", "1")
  247. .env("PATH", "/usr/bin:/bin")
  248. .args([
  249. "--model",
  250. "sonnet",
  251. "--permission-mode",
  252. case.permission_mode,
  253. "--output-format=json",
  254. ]);
  255. if let Some(allowed_tools) = case.allowed_tools {
  256. command.args(["--allowedTools", allowed_tools]);
  257. }
  258. let prompt = format!("{SCENARIO_PREFIX}{}", case.name);
  259. command.arg(prompt);
  260. let output = if let Some(stdin) = case.stdin {
  261. let mut child = command
  262. .stdin(Stdio::piped())
  263. .stdout(Stdio::piped())
  264. .stderr(Stdio::piped())
  265. .spawn()
  266. .expect("claw should launch");
  267. child
  268. .stdin
  269. .as_mut()
  270. .expect("stdin should be piped")
  271. .write_all(stdin.as_bytes())
  272. .expect("stdin should write");
  273. child.wait_with_output().expect("claw should finish")
  274. } else {
  275. command.output().expect("claw should launch")
  276. };
  277. assert_success(&output);
  278. let stdout = String::from_utf8_lossy(&output.stdout).into_owned();
  279. ScenarioRun {
  280. response: parse_json_output(&stdout),
  281. stdout,
  282. }
  283. }
  284. fn prepare_noop(_: &HarnessWorkspace) {}
  285. fn prepare_read_fixture(workspace: &HarnessWorkspace) {
  286. fs::write(workspace.root.join("fixture.txt"), "alpha parity line\n")
  287. .expect("fixture should write");
  288. }
  289. fn prepare_grep_fixture(workspace: &HarnessWorkspace) {
  290. fs::write(
  291. workspace.root.join("fixture.txt"),
  292. "alpha parity line\nbeta line\ngamma parity line\n",
  293. )
  294. .expect("grep fixture should write");
  295. }
  296. fn prepare_multi_tool_fixture(workspace: &HarnessWorkspace) {
  297. fs::write(
  298. workspace.root.join("fixture.txt"),
  299. "alpha parity line\nbeta line\ngamma parity line\n",
  300. )
  301. .expect("multi tool fixture should write");
  302. }
  303. fn prepare_plugin_fixture(workspace: &HarnessWorkspace) {
  304. let plugin_root = workspace
  305. .root
  306. .join("external-plugins")
  307. .join("parity-plugin");
  308. let tool_dir = plugin_root.join("tools");
  309. let manifest_dir = plugin_root.join(".claude-plugin");
  310. fs::create_dir_all(&tool_dir).expect("plugin tools dir");
  311. fs::create_dir_all(&manifest_dir).expect("plugin manifest dir");
  312. let script_path = tool_dir.join("echo-json.sh");
  313. fs::write(
  314. &script_path,
  315. "#!/bin/sh\nINPUT=$(cat)\nprintf '{\"plugin\":\"%s\",\"tool\":\"%s\",\"input\":%s}\\n' \"$CLAWD_PLUGIN_ID\" \"$CLAWD_TOOL_NAME\" \"$INPUT\"\n",
  316. )
  317. .expect("plugin script should write");
  318. let mut permissions = fs::metadata(&script_path)
  319. .expect("plugin script metadata")
  320. .permissions();
  321. permissions.set_mode(0o755);
  322. fs::set_permissions(&script_path, permissions).expect("plugin script should be executable");
  323. fs::write(
  324. manifest_dir.join("plugin.json"),
  325. r#"{
  326. "name": "parity-plugin",
  327. "version": "1.0.0",
  328. "description": "mock parity plugin",
  329. "tools": [
  330. {
  331. "name": "plugin_echo",
  332. "description": "Echo JSON input",
  333. "inputSchema": {
  334. "type": "object",
  335. "properties": {
  336. "message": { "type": "string" }
  337. },
  338. "required": ["message"],
  339. "additionalProperties": false
  340. },
  341. "command": "./tools/echo-json.sh",
  342. "requiredPermission": "workspace-write"
  343. }
  344. ]
  345. }"#,
  346. )
  347. .expect("plugin manifest should write");
  348. fs::write(
  349. workspace.config_home.join("settings.json"),
  350. json!({
  351. "enabledPlugins": {
  352. "parity-plugin@external": true
  353. },
  354. "plugins": {
  355. "externalDirectories": [plugin_root.parent().expect("plugin parent").display().to_string()]
  356. }
  357. })
  358. .to_string(),
  359. )
  360. .expect("plugin settings should write");
  361. }
  362. fn assert_streaming_text(_: &HarnessWorkspace, run: &ScenarioRun) {
  363. assert_eq!(
  364. run.response["message"],
  365. Value::String("Mock streaming says hello from the parity harness.".to_string())
  366. );
  367. assert_eq!(run.response["iterations"], Value::from(1));
  368. assert_eq!(run.response["tool_uses"], Value::Array(Vec::new()));
  369. assert_eq!(run.response["tool_results"], Value::Array(Vec::new()));
  370. }
  371. fn assert_read_file_roundtrip(workspace: &HarnessWorkspace, run: &ScenarioRun) {
  372. assert_eq!(run.response["iterations"], Value::from(2));
  373. assert_eq!(
  374. run.response["tool_uses"][0]["name"],
  375. Value::String("read_file".to_string())
  376. );
  377. assert_eq!(
  378. run.response["tool_uses"][0]["input"],
  379. Value::String(r#"{"path":"fixture.txt"}"#.to_string())
  380. );
  381. assert!(run.response["message"]
  382. .as_str()
  383. .expect("message text")
  384. .contains("alpha parity line"));
  385. let output = run.response["tool_results"][0]["output"]
  386. .as_str()
  387. .expect("tool output");
  388. assert!(output.contains(&workspace.root.join("fixture.txt").display().to_string()));
  389. assert!(output.contains("alpha parity line"));
  390. }
  391. fn assert_grep_chunk_assembly(_: &HarnessWorkspace, run: &ScenarioRun) {
  392. assert_eq!(run.response["iterations"], Value::from(2));
  393. assert_eq!(
  394. run.response["tool_uses"][0]["name"],
  395. Value::String("grep_search".to_string())
  396. );
  397. assert_eq!(
  398. run.response["tool_uses"][0]["input"],
  399. Value::String(
  400. r#"{"pattern":"parity","path":"fixture.txt","output_mode":"count"}"#.to_string()
  401. )
  402. );
  403. assert!(run.response["message"]
  404. .as_str()
  405. .expect("message text")
  406. .contains("2 occurrences"));
  407. assert_eq!(
  408. run.response["tool_results"][0]["is_error"],
  409. Value::Bool(false)
  410. );
  411. }
  412. fn assert_write_file_allowed(workspace: &HarnessWorkspace, run: &ScenarioRun) {
  413. assert_eq!(run.response["iterations"], Value::from(2));
  414. assert_eq!(
  415. run.response["tool_uses"][0]["name"],
  416. Value::String("write_file".to_string())
  417. );
  418. assert!(run.response["message"]
  419. .as_str()
  420. .expect("message text")
  421. .contains("generated/output.txt"));
  422. let generated = workspace.root.join("generated").join("output.txt");
  423. let contents = fs::read_to_string(&generated).expect("generated file should exist");
  424. assert_eq!(contents, "created by mock service\n");
  425. assert_eq!(
  426. run.response["tool_results"][0]["is_error"],
  427. Value::Bool(false)
  428. );
  429. }
  430. fn assert_write_file_denied(workspace: &HarnessWorkspace, run: &ScenarioRun) {
  431. assert_eq!(run.response["iterations"], Value::from(2));
  432. assert_eq!(
  433. run.response["tool_uses"][0]["name"],
  434. Value::String("write_file".to_string())
  435. );
  436. let tool_output = run.response["tool_results"][0]["output"]
  437. .as_str()
  438. .expect("tool output");
  439. assert!(tool_output.contains("requires workspace-write permission"));
  440. assert_eq!(
  441. run.response["tool_results"][0]["is_error"],
  442. Value::Bool(true)
  443. );
  444. assert!(run.response["message"]
  445. .as_str()
  446. .expect("message text")
  447. .contains("denied as expected"));
  448. assert!(!workspace.root.join("generated").join("denied.txt").exists());
  449. }
  450. fn assert_multi_tool_turn_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
  451. assert_eq!(run.response["iterations"], Value::from(2));
  452. let tool_uses = run.response["tool_uses"]
  453. .as_array()
  454. .expect("tool uses array");
  455. assert_eq!(
  456. tool_uses.len(),
  457. 2,
  458. "expected two tool uses in a single turn"
  459. );
  460. assert_eq!(tool_uses[0]["name"], Value::String("read_file".to_string()));
  461. assert_eq!(
  462. tool_uses[1]["name"],
  463. Value::String("grep_search".to_string())
  464. );
  465. let tool_results = run.response["tool_results"]
  466. .as_array()
  467. .expect("tool results array");
  468. assert_eq!(
  469. tool_results.len(),
  470. 2,
  471. "expected two tool results in a single turn"
  472. );
  473. assert!(run.response["message"]
  474. .as_str()
  475. .expect("message text")
  476. .contains("alpha parity line"));
  477. assert!(run.response["message"]
  478. .as_str()
  479. .expect("message text")
  480. .contains("2 occurrences"));
  481. }
  482. fn assert_bash_stdout_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
  483. assert_eq!(run.response["iterations"], Value::from(2));
  484. assert_eq!(
  485. run.response["tool_uses"][0]["name"],
  486. Value::String("bash".to_string())
  487. );
  488. let tool_output = run.response["tool_results"][0]["output"]
  489. .as_str()
  490. .expect("tool output");
  491. let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
  492. assert_eq!(
  493. parsed["stdout"],
  494. Value::String("alpha from bash".to_string())
  495. );
  496. assert_eq!(
  497. run.response["tool_results"][0]["is_error"],
  498. Value::Bool(false)
  499. );
  500. assert!(run.response["message"]
  501. .as_str()
  502. .expect("message text")
  503. .contains("alpha from bash"));
  504. }
  505. fn assert_bash_permission_prompt_approved(_: &HarnessWorkspace, run: &ScenarioRun) {
  506. assert!(run.stdout.contains("Permission approval required"));
  507. assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
  508. assert_eq!(run.response["iterations"], Value::from(2));
  509. assert_eq!(
  510. run.response["tool_results"][0]["is_error"],
  511. Value::Bool(false)
  512. );
  513. let tool_output = run.response["tool_results"][0]["output"]
  514. .as_str()
  515. .expect("tool output");
  516. let parsed: Value = serde_json::from_str(tool_output).expect("bash output json");
  517. assert_eq!(
  518. parsed["stdout"],
  519. Value::String("approved via prompt".to_string())
  520. );
  521. assert!(run.response["message"]
  522. .as_str()
  523. .expect("message text")
  524. .contains("approved and executed"));
  525. }
  526. fn assert_bash_permission_prompt_denied(_: &HarnessWorkspace, run: &ScenarioRun) {
  527. assert!(run.stdout.contains("Permission approval required"));
  528. assert!(run.stdout.contains("Approve this tool call? [y/N]:"));
  529. assert_eq!(run.response["iterations"], Value::from(2));
  530. let tool_output = run.response["tool_results"][0]["output"]
  531. .as_str()
  532. .expect("tool output");
  533. assert!(tool_output.contains("denied by user approval prompt"));
  534. assert_eq!(
  535. run.response["tool_results"][0]["is_error"],
  536. Value::Bool(true)
  537. );
  538. assert!(run.response["message"]
  539. .as_str()
  540. .expect("message text")
  541. .contains("denied as expected"));
  542. }
  543. fn assert_plugin_tool_roundtrip(_: &HarnessWorkspace, run: &ScenarioRun) {
  544. assert_eq!(run.response["iterations"], Value::from(2));
  545. assert_eq!(
  546. run.response["tool_uses"][0]["name"],
  547. Value::String("plugin_echo".to_string())
  548. );
  549. let tool_output = run.response["tool_results"][0]["output"]
  550. .as_str()
  551. .expect("tool output");
  552. let parsed: Value = serde_json::from_str(tool_output).expect("plugin output json");
  553. assert_eq!(
  554. parsed["plugin"],
  555. Value::String("parity-plugin@external".to_string())
  556. );
  557. assert_eq!(parsed["tool"], Value::String("plugin_echo".to_string()));
  558. assert_eq!(
  559. parsed["input"]["message"],
  560. Value::String("hello from plugin parity".to_string())
  561. );
  562. assert!(run.response["message"]
  563. .as_str()
  564. .expect("message text")
  565. .contains("hello from plugin parity"));
  566. }
  567. fn parse_json_output(stdout: &str) -> Value {
  568. if let Some(index) = stdout.rfind("{\"auto_compaction\"") {
  569. return serde_json::from_str(&stdout[index..]).unwrap_or_else(|error| {
  570. panic!("failed to parse JSON response from stdout: {error}\n{stdout}")
  571. });
  572. }
  573. stdout
  574. .lines()
  575. .rev()
  576. .find_map(|line| {
  577. let trimmed = line.trim();
  578. if trimmed.starts_with('{') && trimmed.ends_with('}') {
  579. serde_json::from_str(trimmed).ok()
  580. } else {
  581. None
  582. }
  583. })
  584. .unwrap_or_else(|| panic!("no JSON response line found in stdout:\n{stdout}"))
  585. }
  586. fn build_scenario_report(
  587. name: &str,
  588. manifest_entry: &ScenarioManifestEntry,
  589. response: &Value,
  590. ) -> ScenarioReport {
  591. ScenarioReport {
  592. name: name.to_string(),
  593. category: manifest_entry.category.clone(),
  594. description: manifest_entry.description.clone(),
  595. parity_refs: manifest_entry.parity_refs.clone(),
  596. iterations: response["iterations"]
  597. .as_u64()
  598. .expect("iterations should exist"),
  599. request_count: 0,
  600. tool_uses: response["tool_uses"]
  601. .as_array()
  602. .expect("tool uses array")
  603. .iter()
  604. .filter_map(|value| value["name"].as_str().map(ToOwned::to_owned))
  605. .collect(),
  606. tool_error_count: response["tool_results"]
  607. .as_array()
  608. .expect("tool results array")
  609. .iter()
  610. .filter(|value| value["is_error"].as_bool().unwrap_or(false))
  611. .count(),
  612. final_message: response["message"]
  613. .as_str()
  614. .expect("message text")
  615. .to_string(),
  616. }
  617. }
  618. fn maybe_write_report(reports: &[ScenarioReport]) {
  619. let Some(path) = std::env::var_os("MOCK_PARITY_REPORT_PATH") else {
  620. return;
  621. };
  622. let payload = json!({
  623. "scenario_count": reports.len(),
  624. "request_count": reports.iter().map(|report| report.request_count).sum::<usize>(),
  625. "scenarios": reports.iter().map(scenario_report_json).collect::<Vec<_>>(),
  626. });
  627. fs::write(
  628. path,
  629. serde_json::to_vec_pretty(&payload).expect("report json should serialize"),
  630. )
  631. .expect("report should write");
  632. }
  633. fn load_scenario_manifest() -> Vec<ScenarioManifestEntry> {
  634. let manifest_path =
  635. Path::new(env!("CARGO_MANIFEST_DIR")).join("../../mock_parity_scenarios.json");
  636. let manifest = fs::read_to_string(&manifest_path).expect("scenario manifest should exist");
  637. serde_json::from_str::<Vec<Value>>(&manifest)
  638. .expect("scenario manifest should parse")
  639. .into_iter()
  640. .map(|entry| ScenarioManifestEntry {
  641. name: entry["name"]
  642. .as_str()
  643. .expect("scenario name should be a string")
  644. .to_string(),
  645. category: entry["category"]
  646. .as_str()
  647. .expect("scenario category should be a string")
  648. .to_string(),
  649. description: entry["description"]
  650. .as_str()
  651. .expect("scenario description should be a string")
  652. .to_string(),
  653. parity_refs: entry["parity_refs"]
  654. .as_array()
  655. .expect("parity refs should be an array")
  656. .iter()
  657. .map(|value| {
  658. value
  659. .as_str()
  660. .expect("parity ref should be a string")
  661. .to_string()
  662. })
  663. .collect(),
  664. })
  665. .collect()
  666. }
  667. fn scenario_report_json(report: &ScenarioReport) -> Value {
  668. json!({
  669. "name": report.name,
  670. "category": report.category,
  671. "description": report.description,
  672. "parity_refs": report.parity_refs,
  673. "iterations": report.iterations,
  674. "request_count": report.request_count,
  675. "tool_uses": report.tool_uses,
  676. "tool_error_count": report.tool_error_count,
  677. "final_message": report.final_message,
  678. })
  679. }
  680. fn assert_success(output: &Output) {
  681. assert!(
  682. output.status.success(),
  683. "stdout:\n{}\n\nstderr:\n{}",
  684. String::from_utf8_lossy(&output.stdout),
  685. String::from_utf8_lossy(&output.stderr)
  686. );
  687. }
  688. fn unique_temp_dir(label: &str) -> PathBuf {
  689. let millis = SystemTime::now()
  690. .duration_since(UNIX_EPOCH)
  691. .expect("clock should be after epoch")
  692. .as_millis();
  693. let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
  694. std::env::temp_dir().join(format!(
  695. "claw-mock-parity-{label}-{}-{millis}-{counter}",
  696. std::process::id()
  697. ))
  698. }