mock_parity_harness.rs 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257
  1. use std::fs;
  2. use std::path::{Path, PathBuf};
  3. use std::process::{Command, Output};
  4. use std::sync::atomic::{AtomicU64, Ordering};
  5. use std::time::{SystemTime, UNIX_EPOCH};
  6. use mock_anthropic_service::{MockAnthropicService, SCENARIO_PREFIX};
  7. use serde_json::Value;
  8. static TEMP_COUNTER: AtomicU64 = AtomicU64::new(0);
  9. #[test]
  10. fn clean_env_cli_reaches_mock_anthropic_service_across_scripted_parity_scenarios() {
  11. let runtime = tokio::runtime::Runtime::new().expect("tokio runtime should build");
  12. let server = runtime
  13. .block_on(MockAnthropicService::spawn())
  14. .expect("mock service should start");
  15. let base_url = server.base_url();
  16. let cases = [
  17. ScenarioCase {
  18. name: "streaming_text",
  19. permission_mode: "read-only",
  20. allowed_tools: None,
  21. seed: seed_noop,
  22. assert: assert_streaming_text,
  23. },
  24. ScenarioCase {
  25. name: "read_file_roundtrip",
  26. permission_mode: "read-only",
  27. allowed_tools: Some("read_file"),
  28. seed: seed_read_fixture,
  29. assert: assert_read_file_roundtrip,
  30. },
  31. ScenarioCase {
  32. name: "grep_chunk_assembly",
  33. permission_mode: "read-only",
  34. allowed_tools: Some("grep_search"),
  35. seed: seed_grep_fixture,
  36. assert: assert_grep_chunk_assembly,
  37. },
  38. ScenarioCase {
  39. name: "write_file_allowed",
  40. permission_mode: "workspace-write",
  41. allowed_tools: Some("write_file"),
  42. seed: seed_noop,
  43. assert: assert_write_file_allowed,
  44. },
  45. ScenarioCase {
  46. name: "write_file_denied",
  47. permission_mode: "read-only",
  48. allowed_tools: Some("write_file"),
  49. seed: seed_noop,
  50. assert: assert_write_file_denied,
  51. },
  52. ];
  53. for case in cases {
  54. let workspace = unique_temp_dir(case.name);
  55. fs::create_dir_all(&workspace).expect("workspace should exist");
  56. (case.seed)(&workspace);
  57. let response = run_case(case, &workspace, &base_url);
  58. (case.assert)(&workspace, &response);
  59. fs::remove_dir_all(&workspace).expect("workspace cleanup should succeed");
  60. }
  61. let captured = runtime.block_on(server.captured_requests());
  62. assert_eq!(
  63. captured.len(),
  64. 9,
  65. "five scenarios should produce nine requests"
  66. );
  67. assert!(captured
  68. .iter()
  69. .all(|request| request.path == "/v1/messages"));
  70. assert!(captured.iter().all(|request| request.stream));
  71. let scenarios = captured
  72. .iter()
  73. .map(|request| request.scenario.as_str())
  74. .collect::<Vec<_>>();
  75. assert_eq!(
  76. scenarios,
  77. vec![
  78. "streaming_text",
  79. "read_file_roundtrip",
  80. "read_file_roundtrip",
  81. "grep_chunk_assembly",
  82. "grep_chunk_assembly",
  83. "write_file_allowed",
  84. "write_file_allowed",
  85. "write_file_denied",
  86. "write_file_denied",
  87. ]
  88. );
  89. }
  90. #[derive(Clone, Copy)]
  91. struct ScenarioCase {
  92. name: &'static str,
  93. permission_mode: &'static str,
  94. allowed_tools: Option<&'static str>,
  95. seed: fn(&Path),
  96. assert: fn(&Path, &Value),
  97. }
  98. fn run_case(case: ScenarioCase, workspace: &Path, base_url: &str) -> Value {
  99. let config_home = workspace.join("config-home");
  100. let home = workspace.join("home");
  101. fs::create_dir_all(config_home.join(".claw")).expect("config home should exist");
  102. fs::create_dir_all(&home).expect("home should exist");
  103. let mut command = Command::new(env!("CARGO_BIN_EXE_claw"));
  104. command
  105. .current_dir(workspace)
  106. .env_clear()
  107. .env("ANTHROPIC_API_KEY", "test-parity-key")
  108. .env("ANTHROPIC_BASE_URL", base_url)
  109. .env("CLAW_CONFIG_HOME", &config_home)
  110. .env("HOME", &home)
  111. .env("NO_COLOR", "1")
  112. .args([
  113. "--model",
  114. "sonnet",
  115. "--permission-mode",
  116. case.permission_mode,
  117. "--output-format=json",
  118. ]);
  119. if let Some(allowed_tools) = case.allowed_tools {
  120. command.args(["--allowedTools", allowed_tools]);
  121. }
  122. let prompt = format!("{SCENARIO_PREFIX}{}", case.name);
  123. let output = command.arg(prompt).output().expect("claw should launch");
  124. assert_success(&output);
  125. serde_json::from_slice(&output.stdout).expect("prompt output should be valid json")
  126. }
  127. fn seed_noop(_: &Path) {}
  128. fn seed_read_fixture(workspace: &Path) {
  129. fs::write(workspace.join("fixture.txt"), "alpha parity line\n").expect("fixture should write");
  130. }
  131. fn seed_grep_fixture(workspace: &Path) {
  132. fs::write(
  133. workspace.join("fixture.txt"),
  134. "alpha parity line\nbeta line\ngamma parity line\n",
  135. )
  136. .expect("grep fixture should write");
  137. }
  138. fn assert_streaming_text(_: &Path, response: &Value) {
  139. assert_eq!(
  140. response["message"],
  141. Value::String("Mock streaming says hello from the parity harness.".to_string())
  142. );
  143. assert_eq!(response["iterations"], Value::from(1));
  144. assert_eq!(response["tool_uses"], Value::Array(Vec::new()));
  145. assert_eq!(response["tool_results"], Value::Array(Vec::new()));
  146. }
  147. fn assert_read_file_roundtrip(workspace: &Path, response: &Value) {
  148. assert_eq!(response["iterations"], Value::from(2));
  149. assert_eq!(
  150. response["tool_uses"][0]["name"],
  151. Value::String("read_file".to_string())
  152. );
  153. assert_eq!(
  154. response["tool_uses"][0]["input"],
  155. Value::String(r#"{"path":"fixture.txt"}"#.to_string())
  156. );
  157. assert!(response["message"]
  158. .as_str()
  159. .expect("message text")
  160. .contains("alpha parity line"));
  161. let output = response["tool_results"][0]["output"]
  162. .as_str()
  163. .expect("tool output");
  164. assert!(output.contains(&workspace.join("fixture.txt").display().to_string()));
  165. assert!(output.contains("alpha parity line"));
  166. }
  167. fn assert_grep_chunk_assembly(_: &Path, response: &Value) {
  168. assert_eq!(response["iterations"], Value::from(2));
  169. assert_eq!(
  170. response["tool_uses"][0]["name"],
  171. Value::String("grep_search".to_string())
  172. );
  173. assert_eq!(
  174. response["tool_uses"][0]["input"],
  175. Value::String(
  176. r#"{"pattern":"parity","path":"fixture.txt","output_mode":"count"}"#.to_string()
  177. )
  178. );
  179. assert!(response["message"]
  180. .as_str()
  181. .expect("message text")
  182. .contains("2 occurrences"));
  183. assert_eq!(response["tool_results"][0]["is_error"], Value::Bool(false));
  184. }
  185. fn assert_write_file_allowed(workspace: &Path, response: &Value) {
  186. assert_eq!(response["iterations"], Value::from(2));
  187. assert_eq!(
  188. response["tool_uses"][0]["name"],
  189. Value::String("write_file".to_string())
  190. );
  191. assert!(response["message"]
  192. .as_str()
  193. .expect("message text")
  194. .contains("generated/output.txt"));
  195. let generated = workspace.join("generated").join("output.txt");
  196. let contents = fs::read_to_string(&generated).expect("generated file should exist");
  197. assert_eq!(contents, "created by mock service\n");
  198. assert_eq!(response["tool_results"][0]["is_error"], Value::Bool(false));
  199. }
  200. fn assert_write_file_denied(workspace: &Path, response: &Value) {
  201. assert_eq!(response["iterations"], Value::from(2));
  202. assert_eq!(
  203. response["tool_uses"][0]["name"],
  204. Value::String("write_file".to_string())
  205. );
  206. let tool_output = response["tool_results"][0]["output"]
  207. .as_str()
  208. .expect("tool output");
  209. assert!(tool_output.contains("requires workspace-write permission"));
  210. assert_eq!(response["tool_results"][0]["is_error"], Value::Bool(true));
  211. assert!(response["message"]
  212. .as_str()
  213. .expect("message text")
  214. .contains("denied as expected"));
  215. assert!(!workspace.join("generated").join("denied.txt").exists());
  216. }
  217. fn assert_success(output: &Output) {
  218. assert!(
  219. output.status.success(),
  220. "stdout:\n{}\n\nstderr:\n{}",
  221. String::from_utf8_lossy(&output.stdout),
  222. String::from_utf8_lossy(&output.stderr)
  223. );
  224. }
  225. fn unique_temp_dir(label: &str) -> PathBuf {
  226. let millis = SystemTime::now()
  227. .duration_since(UNIX_EPOCH)
  228. .expect("clock should be after epoch")
  229. .as_millis();
  230. let counter = TEMP_COUNTER.fetch_add(1, Ordering::Relaxed);
  231. std::env::temp_dir().join(format!(
  232. "claw-mock-parity-{label}-{}-{millis}-{counter}",
  233. std::process::id()
  234. ))
  235. }