file_ops.rs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762
  1. use std::cmp::Reverse;
  2. use std::fs;
  3. use std::io;
  4. use std::path::{Path, PathBuf};
  5. use std::time::Instant;
  6. use glob::Pattern;
  7. use regex::RegexBuilder;
  8. use serde::{Deserialize, Serialize};
  9. use walkdir::WalkDir;
  10. /// Maximum file size that can be read (10 MB).
  11. const MAX_READ_SIZE: u64 = 10 * 1024 * 1024;
  12. /// Maximum file size that can be written (10 MB).
  13. const MAX_WRITE_SIZE: usize = 10 * 1024 * 1024;
  14. /// Check whether a file appears to contain binary content by examining
  15. /// the first chunk for NUL bytes.
  16. fn is_binary_file(path: &Path) -> io::Result<bool> {
  17. use std::io::Read;
  18. let mut file = fs::File::open(path)?;
  19. let mut buffer = [0u8; 8192];
  20. let bytes_read = file.read(&mut buffer)?;
  21. Ok(buffer[..bytes_read].contains(&0))
  22. }
  23. /// Validate that a resolved path stays within the given workspace root.
  24. /// Returns the canonical path on success, or an error if the path escapes
  25. /// the workspace boundary (e.g. via `../` traversal or symlink).
  26. #[allow(dead_code)]
  27. fn validate_workspace_boundary(resolved: &Path, workspace_root: &Path) -> io::Result<()> {
  28. if !resolved.starts_with(workspace_root) {
  29. return Err(io::Error::new(
  30. io::ErrorKind::PermissionDenied,
  31. format!(
  32. "path {} escapes workspace boundary {}",
  33. resolved.display(),
  34. workspace_root.display()
  35. ),
  36. ));
  37. }
  38. Ok(())
  39. }
  40. /// Text payload returned by file-reading operations.
  41. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  42. pub struct TextFilePayload {
  43. #[serde(rename = "filePath")]
  44. pub file_path: String,
  45. pub content: String,
  46. #[serde(rename = "numLines")]
  47. pub num_lines: usize,
  48. #[serde(rename = "startLine")]
  49. pub start_line: usize,
  50. #[serde(rename = "totalLines")]
  51. pub total_lines: usize,
  52. }
  53. /// Output envelope for the `read_file` tool.
  54. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  55. pub struct ReadFileOutput {
  56. #[serde(rename = "type")]
  57. pub kind: String,
  58. pub file: TextFilePayload,
  59. }
  60. /// Structured patch hunk emitted by write and edit operations.
  61. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  62. pub struct StructuredPatchHunk {
  63. #[serde(rename = "oldStart")]
  64. pub old_start: usize,
  65. #[serde(rename = "oldLines")]
  66. pub old_lines: usize,
  67. #[serde(rename = "newStart")]
  68. pub new_start: usize,
  69. #[serde(rename = "newLines")]
  70. pub new_lines: usize,
  71. pub lines: Vec<String>,
  72. }
  73. /// Output envelope for full-file write operations.
  74. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  75. pub struct WriteFileOutput {
  76. #[serde(rename = "type")]
  77. pub kind: String,
  78. #[serde(rename = "filePath")]
  79. pub file_path: String,
  80. pub content: String,
  81. #[serde(rename = "structuredPatch")]
  82. pub structured_patch: Vec<StructuredPatchHunk>,
  83. #[serde(rename = "originalFile")]
  84. pub original_file: Option<String>,
  85. #[serde(rename = "gitDiff")]
  86. pub git_diff: Option<serde_json::Value>,
  87. }
  88. /// Output envelope for targeted string-replacement edits.
  89. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  90. pub struct EditFileOutput {
  91. #[serde(rename = "filePath")]
  92. pub file_path: String,
  93. #[serde(rename = "oldString")]
  94. pub old_string: String,
  95. #[serde(rename = "newString")]
  96. pub new_string: String,
  97. #[serde(rename = "originalFile")]
  98. pub original_file: String,
  99. #[serde(rename = "structuredPatch")]
  100. pub structured_patch: Vec<StructuredPatchHunk>,
  101. #[serde(rename = "userModified")]
  102. pub user_modified: bool,
  103. #[serde(rename = "replaceAll")]
  104. pub replace_all: bool,
  105. #[serde(rename = "gitDiff")]
  106. pub git_diff: Option<serde_json::Value>,
  107. }
  108. /// Result of a glob-based filename search.
  109. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
  110. pub struct GlobSearchOutput {
  111. #[serde(rename = "durationMs")]
  112. pub duration_ms: u128,
  113. #[serde(rename = "numFiles")]
  114. pub num_files: usize,
  115. pub filenames: Vec<String>,
  116. pub truncated: bool,
  117. }
  118. /// Parameters accepted by the grep-style search tool.
  119. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  120. pub struct GrepSearchInput {
  121. pub pattern: String,
  122. pub path: Option<String>,
  123. pub glob: Option<String>,
  124. #[serde(rename = "output_mode")]
  125. pub output_mode: Option<String>,
  126. #[serde(rename = "-B")]
  127. pub before: Option<usize>,
  128. #[serde(rename = "-A")]
  129. pub after: Option<usize>,
  130. #[serde(rename = "-C")]
  131. pub context_short: Option<usize>,
  132. pub context: Option<usize>,
  133. #[serde(rename = "-n")]
  134. pub line_numbers: Option<bool>,
  135. #[serde(rename = "-i")]
  136. pub case_insensitive: Option<bool>,
  137. #[serde(rename = "type")]
  138. pub file_type: Option<String>,
  139. pub head_limit: Option<usize>,
  140. pub offset: Option<usize>,
  141. pub multiline: Option<bool>,
  142. }
  143. /// Result payload returned by the grep-style search tool.
  144. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  145. pub struct GrepSearchOutput {
  146. pub mode: Option<String>,
  147. #[serde(rename = "numFiles")]
  148. pub num_files: usize,
  149. pub filenames: Vec<String>,
  150. pub content: Option<String>,
  151. #[serde(rename = "numLines")]
  152. pub num_lines: Option<usize>,
  153. #[serde(rename = "numMatches")]
  154. pub num_matches: Option<usize>,
  155. #[serde(rename = "appliedLimit")]
  156. pub applied_limit: Option<usize>,
  157. #[serde(rename = "appliedOffset")]
  158. pub applied_offset: Option<usize>,
  159. }
  160. /// Reads a text file and returns a line-windowed payload.
  161. pub fn read_file(
  162. path: &str,
  163. offset: Option<usize>,
  164. limit: Option<usize>,
  165. ) -> io::Result<ReadFileOutput> {
  166. let absolute_path = normalize_path(path)?;
  167. // Check file size before reading
  168. let metadata = fs::metadata(&absolute_path)?;
  169. if metadata.len() > MAX_READ_SIZE {
  170. return Err(io::Error::new(
  171. io::ErrorKind::InvalidData,
  172. format!(
  173. "file is too large ({} bytes, max {} bytes)",
  174. metadata.len(),
  175. MAX_READ_SIZE
  176. ),
  177. ));
  178. }
  179. // Detect binary files
  180. if is_binary_file(&absolute_path)? {
  181. return Err(io::Error::new(
  182. io::ErrorKind::InvalidData,
  183. "file appears to be binary",
  184. ));
  185. }
  186. let content = fs::read_to_string(&absolute_path)?;
  187. let lines: Vec<&str> = content.lines().collect();
  188. let start_index = offset.unwrap_or(0).min(lines.len());
  189. let end_index = limit.map_or(lines.len(), |limit| {
  190. start_index.saturating_add(limit).min(lines.len())
  191. });
  192. let selected = lines[start_index..end_index].join("\n");
  193. Ok(ReadFileOutput {
  194. kind: String::from("text"),
  195. file: TextFilePayload {
  196. file_path: absolute_path.to_string_lossy().into_owned(),
  197. content: selected,
  198. num_lines: end_index.saturating_sub(start_index),
  199. start_line: start_index.saturating_add(1),
  200. total_lines: lines.len(),
  201. },
  202. })
  203. }
  204. /// Replaces a file's contents and returns patch metadata.
  205. pub fn write_file(path: &str, content: &str) -> io::Result<WriteFileOutput> {
  206. if content.len() > MAX_WRITE_SIZE {
  207. return Err(io::Error::new(
  208. io::ErrorKind::InvalidData,
  209. format!(
  210. "content is too large ({} bytes, max {} bytes)",
  211. content.len(),
  212. MAX_WRITE_SIZE
  213. ),
  214. ));
  215. }
  216. let absolute_path = normalize_path_allow_missing(path)?;
  217. let original_file = fs::read_to_string(&absolute_path).ok();
  218. if let Some(parent) = absolute_path.parent() {
  219. fs::create_dir_all(parent)?;
  220. }
  221. fs::write(&absolute_path, content)?;
  222. Ok(WriteFileOutput {
  223. kind: if original_file.is_some() {
  224. String::from("update")
  225. } else {
  226. String::from("create")
  227. },
  228. file_path: absolute_path.to_string_lossy().into_owned(),
  229. content: content.to_owned(),
  230. structured_patch: make_patch(original_file.as_deref().unwrap_or(""), content),
  231. original_file,
  232. git_diff: None,
  233. })
  234. }
  235. /// Performs an in-file string replacement and returns patch metadata.
  236. pub fn edit_file(
  237. path: &str,
  238. old_string: &str,
  239. new_string: &str,
  240. replace_all: bool,
  241. ) -> io::Result<EditFileOutput> {
  242. let absolute_path = normalize_path(path)?;
  243. let original_file = fs::read_to_string(&absolute_path)?;
  244. if old_string == new_string {
  245. return Err(io::Error::new(
  246. io::ErrorKind::InvalidInput,
  247. "old_string and new_string must differ",
  248. ));
  249. }
  250. if !original_file.contains(old_string) {
  251. return Err(io::Error::new(
  252. io::ErrorKind::NotFound,
  253. "old_string not found in file",
  254. ));
  255. }
  256. let updated = if replace_all {
  257. original_file.replace(old_string, new_string)
  258. } else {
  259. original_file.replacen(old_string, new_string, 1)
  260. };
  261. fs::write(&absolute_path, &updated)?;
  262. Ok(EditFileOutput {
  263. file_path: absolute_path.to_string_lossy().into_owned(),
  264. old_string: old_string.to_owned(),
  265. new_string: new_string.to_owned(),
  266. original_file: original_file.clone(),
  267. structured_patch: make_patch(&original_file, &updated),
  268. user_modified: false,
  269. replace_all,
  270. git_diff: None,
  271. })
  272. }
  273. /// Expands a glob pattern and returns matching filenames.
  274. pub fn glob_search(pattern: &str, path: Option<&str>) -> io::Result<GlobSearchOutput> {
  275. let started = Instant::now();
  276. let base_dir = path
  277. .map(normalize_path)
  278. .transpose()?
  279. .unwrap_or(std::env::current_dir()?);
  280. let search_pattern = if Path::new(pattern).is_absolute() {
  281. pattern.to_owned()
  282. } else {
  283. base_dir.join(pattern).to_string_lossy().into_owned()
  284. };
  285. let mut matches = Vec::new();
  286. let entries = glob::glob(&search_pattern)
  287. .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?;
  288. for entry in entries.flatten() {
  289. if entry.is_file() {
  290. matches.push(entry);
  291. }
  292. }
  293. matches.sort_by_key(|path| {
  294. fs::metadata(path)
  295. .and_then(|metadata| metadata.modified())
  296. .ok()
  297. .map(Reverse)
  298. });
  299. let truncated = matches.len() > 100;
  300. let filenames = matches
  301. .into_iter()
  302. .take(100)
  303. .map(|path| path.to_string_lossy().into_owned())
  304. .collect::<Vec<_>>();
  305. Ok(GlobSearchOutput {
  306. duration_ms: started.elapsed().as_millis(),
  307. num_files: filenames.len(),
  308. filenames,
  309. truncated,
  310. })
  311. }
  312. /// Runs a regex search over workspace files with optional context lines.
  313. pub fn grep_search(input: &GrepSearchInput) -> io::Result<GrepSearchOutput> {
  314. let base_path = input
  315. .path
  316. .as_deref()
  317. .map(normalize_path)
  318. .transpose()?
  319. .unwrap_or(std::env::current_dir()?);
  320. let regex = RegexBuilder::new(&input.pattern)
  321. .case_insensitive(input.case_insensitive.unwrap_or(false))
  322. .dot_matches_new_line(input.multiline.unwrap_or(false))
  323. .build()
  324. .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?;
  325. let glob_filter = input
  326. .glob
  327. .as_deref()
  328. .map(Pattern::new)
  329. .transpose()
  330. .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?;
  331. let file_type = input.file_type.as_deref();
  332. let output_mode = input
  333. .output_mode
  334. .clone()
  335. .unwrap_or_else(|| String::from("files_with_matches"));
  336. let context = input.context.or(input.context_short).unwrap_or(0);
  337. let mut filenames = Vec::new();
  338. let mut content_lines = Vec::new();
  339. let mut total_matches = 0usize;
  340. for file_path in collect_search_files(&base_path)? {
  341. if !matches_optional_filters(&file_path, glob_filter.as_ref(), file_type) {
  342. continue;
  343. }
  344. let Ok(file_contents) = fs::read_to_string(&file_path) else {
  345. continue;
  346. };
  347. if output_mode == "count" {
  348. let count = regex.find_iter(&file_contents).count();
  349. if count > 0 {
  350. filenames.push(file_path.to_string_lossy().into_owned());
  351. total_matches += count;
  352. }
  353. continue;
  354. }
  355. let lines: Vec<&str> = file_contents.lines().collect();
  356. let mut matched_lines = Vec::new();
  357. for (index, line) in lines.iter().enumerate() {
  358. if regex.is_match(line) {
  359. total_matches += 1;
  360. matched_lines.push(index);
  361. }
  362. }
  363. if matched_lines.is_empty() {
  364. continue;
  365. }
  366. filenames.push(file_path.to_string_lossy().into_owned());
  367. if output_mode == "content" {
  368. for index in matched_lines {
  369. let start = index.saturating_sub(input.before.unwrap_or(context));
  370. let end = (index + input.after.unwrap_or(context) + 1).min(lines.len());
  371. for (current, line) in lines.iter().enumerate().take(end).skip(start) {
  372. let prefix = if input.line_numbers.unwrap_or(true) {
  373. format!("{}:{}:", file_path.to_string_lossy(), current + 1)
  374. } else {
  375. format!("{}:", file_path.to_string_lossy())
  376. };
  377. content_lines.push(format!("{prefix}{line}"));
  378. }
  379. }
  380. }
  381. }
  382. let (filenames, applied_limit, applied_offset) =
  383. apply_limit(filenames, input.head_limit, input.offset);
  384. let content_output = if output_mode == "content" {
  385. let (lines, limit, offset) = apply_limit(content_lines, input.head_limit, input.offset);
  386. return Ok(GrepSearchOutput {
  387. mode: Some(output_mode),
  388. num_files: filenames.len(),
  389. filenames,
  390. num_lines: Some(lines.len()),
  391. content: Some(lines.join("\n")),
  392. num_matches: None,
  393. applied_limit: limit,
  394. applied_offset: offset,
  395. });
  396. } else {
  397. None
  398. };
  399. Ok(GrepSearchOutput {
  400. mode: Some(output_mode.clone()),
  401. num_files: filenames.len(),
  402. filenames,
  403. content: content_output,
  404. num_lines: None,
  405. num_matches: (output_mode == "count").then_some(total_matches),
  406. applied_limit,
  407. applied_offset,
  408. })
  409. }
  410. fn collect_search_files(base_path: &Path) -> io::Result<Vec<PathBuf>> {
  411. if base_path.is_file() {
  412. return Ok(vec![base_path.to_path_buf()]);
  413. }
  414. let mut files = Vec::new();
  415. for entry in WalkDir::new(base_path) {
  416. let entry = entry.map_err(|error| io::Error::other(error.to_string()))?;
  417. if entry.file_type().is_file() {
  418. files.push(entry.path().to_path_buf());
  419. }
  420. }
  421. Ok(files)
  422. }
  423. fn matches_optional_filters(
  424. path: &Path,
  425. glob_filter: Option<&Pattern>,
  426. file_type: Option<&str>,
  427. ) -> bool {
  428. if let Some(glob_filter) = glob_filter {
  429. let path_string = path.to_string_lossy();
  430. if !glob_filter.matches(&path_string) && !glob_filter.matches_path(path) {
  431. return false;
  432. }
  433. }
  434. if let Some(file_type) = file_type {
  435. let extension = path.extension().and_then(|extension| extension.to_str());
  436. if extension != Some(file_type) {
  437. return false;
  438. }
  439. }
  440. true
  441. }
  442. fn apply_limit<T>(
  443. items: Vec<T>,
  444. limit: Option<usize>,
  445. offset: Option<usize>,
  446. ) -> (Vec<T>, Option<usize>, Option<usize>) {
  447. let offset_value = offset.unwrap_or(0);
  448. let mut items = items.into_iter().skip(offset_value).collect::<Vec<_>>();
  449. let explicit_limit = limit.unwrap_or(250);
  450. if explicit_limit == 0 {
  451. return (items, None, (offset_value > 0).then_some(offset_value));
  452. }
  453. let truncated = items.len() > explicit_limit;
  454. items.truncate(explicit_limit);
  455. (
  456. items,
  457. truncated.then_some(explicit_limit),
  458. (offset_value > 0).then_some(offset_value),
  459. )
  460. }
  461. fn make_patch(original: &str, updated: &str) -> Vec<StructuredPatchHunk> {
  462. let mut lines = Vec::new();
  463. for line in original.lines() {
  464. lines.push(format!("-{line}"));
  465. }
  466. for line in updated.lines() {
  467. lines.push(format!("+{line}"));
  468. }
  469. vec![StructuredPatchHunk {
  470. old_start: 1,
  471. old_lines: original.lines().count(),
  472. new_start: 1,
  473. new_lines: updated.lines().count(),
  474. lines,
  475. }]
  476. }
  477. fn normalize_path(path: &str) -> io::Result<PathBuf> {
  478. let candidate = if Path::new(path).is_absolute() {
  479. PathBuf::from(path)
  480. } else {
  481. std::env::current_dir()?.join(path)
  482. };
  483. candidate.canonicalize()
  484. }
  485. fn normalize_path_allow_missing(path: &str) -> io::Result<PathBuf> {
  486. let candidate = if Path::new(path).is_absolute() {
  487. PathBuf::from(path)
  488. } else {
  489. std::env::current_dir()?.join(path)
  490. };
  491. if let Ok(canonical) = candidate.canonicalize() {
  492. return Ok(canonical);
  493. }
  494. if let Some(parent) = candidate.parent() {
  495. let canonical_parent = parent
  496. .canonicalize()
  497. .unwrap_or_else(|_| parent.to_path_buf());
  498. if let Some(name) = candidate.file_name() {
  499. return Ok(canonical_parent.join(name));
  500. }
  501. }
  502. Ok(candidate)
  503. }
  504. /// Read a file with workspace boundary enforcement.
  505. #[allow(dead_code)]
  506. pub fn read_file_in_workspace(
  507. path: &str,
  508. offset: Option<usize>,
  509. limit: Option<usize>,
  510. workspace_root: &Path,
  511. ) -> io::Result<ReadFileOutput> {
  512. let absolute_path = normalize_path(path)?;
  513. let canonical_root = workspace_root
  514. .canonicalize()
  515. .unwrap_or_else(|_| workspace_root.to_path_buf());
  516. validate_workspace_boundary(&absolute_path, &canonical_root)?;
  517. read_file(path, offset, limit)
  518. }
  519. /// Write a file with workspace boundary enforcement.
  520. #[allow(dead_code)]
  521. pub fn write_file_in_workspace(
  522. path: &str,
  523. content: &str,
  524. workspace_root: &Path,
  525. ) -> io::Result<WriteFileOutput> {
  526. let absolute_path = normalize_path_allow_missing(path)?;
  527. let canonical_root = workspace_root
  528. .canonicalize()
  529. .unwrap_or_else(|_| workspace_root.to_path_buf());
  530. validate_workspace_boundary(&absolute_path, &canonical_root)?;
  531. write_file(path, content)
  532. }
  533. /// Edit a file with workspace boundary enforcement.
  534. #[allow(dead_code)]
  535. pub fn edit_file_in_workspace(
  536. path: &str,
  537. old_string: &str,
  538. new_string: &str,
  539. replace_all: bool,
  540. workspace_root: &Path,
  541. ) -> io::Result<EditFileOutput> {
  542. let absolute_path = normalize_path(path)?;
  543. let canonical_root = workspace_root
  544. .canonicalize()
  545. .unwrap_or_else(|_| workspace_root.to_path_buf());
  546. validate_workspace_boundary(&absolute_path, &canonical_root)?;
  547. edit_file(path, old_string, new_string, replace_all)
  548. }
  549. /// Check whether a path is a symlink that resolves outside the workspace.
  550. #[allow(dead_code)]
  551. pub fn is_symlink_escape(path: &Path, workspace_root: &Path) -> io::Result<bool> {
  552. let metadata = fs::symlink_metadata(path)?;
  553. if !metadata.is_symlink() {
  554. return Ok(false);
  555. }
  556. let resolved = path.canonicalize()?;
  557. let canonical_root = workspace_root
  558. .canonicalize()
  559. .unwrap_or_else(|_| workspace_root.to_path_buf());
  560. Ok(!resolved.starts_with(&canonical_root))
  561. }
  562. #[cfg(test)]
  563. mod tests {
  564. use std::time::{SystemTime, UNIX_EPOCH};
  565. use super::{
  566. edit_file, glob_search, grep_search, is_symlink_escape, read_file, read_file_in_workspace,
  567. write_file, GrepSearchInput, MAX_WRITE_SIZE,
  568. };
  569. fn temp_path(name: &str) -> std::path::PathBuf {
  570. let unique = SystemTime::now()
  571. .duration_since(UNIX_EPOCH)
  572. .expect("time should move forward")
  573. .as_nanos();
  574. std::env::temp_dir().join(format!("clawd-native-{name}-{unique}"))
  575. }
  576. #[test]
  577. fn reads_and_writes_files() {
  578. let path = temp_path("read-write.txt");
  579. let write_output = write_file(path.to_string_lossy().as_ref(), "one\ntwo\nthree")
  580. .expect("write should succeed");
  581. assert_eq!(write_output.kind, "create");
  582. let read_output = read_file(path.to_string_lossy().as_ref(), Some(1), Some(1))
  583. .expect("read should succeed");
  584. assert_eq!(read_output.file.content, "two");
  585. }
  586. #[test]
  587. fn edits_file_contents() {
  588. let path = temp_path("edit.txt");
  589. write_file(path.to_string_lossy().as_ref(), "alpha beta alpha")
  590. .expect("initial write should succeed");
  591. let output = edit_file(path.to_string_lossy().as_ref(), "alpha", "omega", true)
  592. .expect("edit should succeed");
  593. assert!(output.replace_all);
  594. }
  595. #[test]
  596. fn rejects_binary_files() {
  597. let path = temp_path("binary-test.bin");
  598. std::fs::write(&path, b"\x00\x01\x02\x03binary content").expect("write should succeed");
  599. let result = read_file(path.to_string_lossy().as_ref(), None, None);
  600. assert!(result.is_err());
  601. let error = result.unwrap_err();
  602. assert_eq!(error.kind(), std::io::ErrorKind::InvalidData);
  603. assert!(error.to_string().contains("binary"));
  604. }
  605. #[test]
  606. fn rejects_oversized_writes() {
  607. let path = temp_path("oversize-write.txt");
  608. let huge = "x".repeat(MAX_WRITE_SIZE + 1);
  609. let result = write_file(path.to_string_lossy().as_ref(), &huge);
  610. assert!(result.is_err());
  611. let error = result.unwrap_err();
  612. assert_eq!(error.kind(), std::io::ErrorKind::InvalidData);
  613. assert!(error.to_string().contains("too large"));
  614. }
  615. #[test]
  616. fn enforces_workspace_boundary() {
  617. let workspace = temp_path("workspace-boundary");
  618. std::fs::create_dir_all(&workspace).expect("workspace dir should be created");
  619. let inside = workspace.join("inside.txt");
  620. write_file(inside.to_string_lossy().as_ref(), "safe content")
  621. .expect("write inside workspace should succeed");
  622. // Reading inside workspace should succeed
  623. let result =
  624. read_file_in_workspace(inside.to_string_lossy().as_ref(), None, None, &workspace);
  625. assert!(result.is_ok());
  626. // Reading outside workspace should fail
  627. let outside = temp_path("outside-boundary.txt");
  628. write_file(outside.to_string_lossy().as_ref(), "unsafe content")
  629. .expect("write outside should succeed");
  630. let result =
  631. read_file_in_workspace(outside.to_string_lossy().as_ref(), None, None, &workspace);
  632. assert!(result.is_err());
  633. let error = result.unwrap_err();
  634. assert_eq!(error.kind(), std::io::ErrorKind::PermissionDenied);
  635. assert!(error.to_string().contains("escapes workspace"));
  636. }
  637. #[test]
  638. fn detects_symlink_escape() {
  639. let workspace = temp_path("symlink-workspace");
  640. std::fs::create_dir_all(&workspace).expect("workspace dir should be created");
  641. let outside = temp_path("symlink-target.txt");
  642. std::fs::write(&outside, "target content").expect("target should write");
  643. let link_path = workspace.join("escape-link.txt");
  644. #[cfg(unix)]
  645. {
  646. std::os::unix::fs::symlink(&outside, &link_path).expect("symlink should create");
  647. assert!(is_symlink_escape(&link_path, &workspace).expect("check should succeed"));
  648. }
  649. // Non-symlink file should not be an escape
  650. let normal = workspace.join("normal.txt");
  651. std::fs::write(&normal, "normal content").expect("normal file should write");
  652. assert!(!is_symlink_escape(&normal, &workspace).expect("check should succeed"));
  653. }
  654. #[test]
  655. fn globs_and_greps_directory() {
  656. let dir = temp_path("search-dir");
  657. std::fs::create_dir_all(&dir).expect("directory should be created");
  658. let file = dir.join("demo.rs");
  659. write_file(
  660. file.to_string_lossy().as_ref(),
  661. "fn main() {\n println!(\"hello\");\n}\n",
  662. )
  663. .expect("file write should succeed");
  664. let globbed = glob_search("**/*.rs", Some(dir.to_string_lossy().as_ref()))
  665. .expect("glob should succeed");
  666. assert_eq!(globbed.num_files, 1);
  667. let grep_output = grep_search(&GrepSearchInput {
  668. pattern: String::from("hello"),
  669. path: Some(dir.to_string_lossy().into_owned()),
  670. glob: Some(String::from("**/*.rs")),
  671. output_mode: Some(String::from("content")),
  672. before: None,
  673. after: None,
  674. context_short: None,
  675. context: None,
  676. line_numbers: Some(true),
  677. case_insensitive: Some(false),
  678. file_type: None,
  679. head_limit: Some(10),
  680. offset: Some(0),
  681. multiline: Some(false),
  682. })
  683. .expect("grep should succeed");
  684. assert!(grep_output.content.unwrap_or_default().contains("hello"));
  685. }
  686. }