file_ops.rs 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. use std::cmp::Reverse;
  2. use std::fs;
  3. use std::io;
  4. use std::path::{Path, PathBuf};
  5. use std::time::Instant;
  6. use glob::Pattern;
  7. use regex::RegexBuilder;
  8. use serde::{Deserialize, Serialize};
  9. use walkdir::WalkDir;
  10. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  11. pub struct TextFilePayload {
  12. #[serde(rename = "filePath")]
  13. pub file_path: String,
  14. pub content: String,
  15. #[serde(rename = "numLines")]
  16. pub num_lines: usize,
  17. #[serde(rename = "startLine")]
  18. pub start_line: usize,
  19. #[serde(rename = "totalLines")]
  20. pub total_lines: usize,
  21. }
  22. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  23. pub struct ReadFileOutput {
  24. #[serde(rename = "type")]
  25. pub kind: String,
  26. pub file: TextFilePayload,
  27. }
  28. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  29. pub struct StructuredPatchHunk {
  30. #[serde(rename = "oldStart")]
  31. pub old_start: usize,
  32. #[serde(rename = "oldLines")]
  33. pub old_lines: usize,
  34. #[serde(rename = "newStart")]
  35. pub new_start: usize,
  36. #[serde(rename = "newLines")]
  37. pub new_lines: usize,
  38. pub lines: Vec<String>,
  39. }
  40. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  41. pub struct WriteFileOutput {
  42. #[serde(rename = "type")]
  43. pub kind: String,
  44. #[serde(rename = "filePath")]
  45. pub file_path: String,
  46. pub content: String,
  47. #[serde(rename = "structuredPatch")]
  48. pub structured_patch: Vec<StructuredPatchHunk>,
  49. #[serde(rename = "originalFile")]
  50. pub original_file: Option<String>,
  51. #[serde(rename = "gitDiff")]
  52. pub git_diff: Option<serde_json::Value>,
  53. }
  54. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  55. pub struct EditFileOutput {
  56. #[serde(rename = "filePath")]
  57. pub file_path: String,
  58. #[serde(rename = "oldString")]
  59. pub old_string: String,
  60. #[serde(rename = "newString")]
  61. pub new_string: String,
  62. #[serde(rename = "originalFile")]
  63. pub original_file: String,
  64. #[serde(rename = "structuredPatch")]
  65. pub structured_patch: Vec<StructuredPatchHunk>,
  66. #[serde(rename = "userModified")]
  67. pub user_modified: bool,
  68. #[serde(rename = "replaceAll")]
  69. pub replace_all: bool,
  70. #[serde(rename = "gitDiff")]
  71. pub git_diff: Option<serde_json::Value>,
  72. }
  73. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
  74. pub struct GlobSearchOutput {
  75. #[serde(rename = "durationMs")]
  76. pub duration_ms: u128,
  77. #[serde(rename = "numFiles")]
  78. pub num_files: usize,
  79. pub filenames: Vec<String>,
  80. pub truncated: bool,
  81. }
  82. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  83. pub struct GrepSearchInput {
  84. pub pattern: String,
  85. pub path: Option<String>,
  86. pub glob: Option<String>,
  87. #[serde(rename = "output_mode")]
  88. pub output_mode: Option<String>,
  89. #[serde(rename = "-B")]
  90. pub before: Option<usize>,
  91. #[serde(rename = "-A")]
  92. pub after: Option<usize>,
  93. #[serde(rename = "-C")]
  94. pub context_short: Option<usize>,
  95. pub context: Option<usize>,
  96. #[serde(rename = "-n")]
  97. pub line_numbers: Option<bool>,
  98. #[serde(rename = "-i")]
  99. pub case_insensitive: Option<bool>,
  100. #[serde(rename = "type")]
  101. pub file_type: Option<String>,
  102. pub head_limit: Option<usize>,
  103. pub offset: Option<usize>,
  104. pub multiline: Option<bool>,
  105. }
  106. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
  107. pub struct GrepSearchOutput {
  108. pub mode: Option<String>,
  109. #[serde(rename = "numFiles")]
  110. pub num_files: usize,
  111. pub filenames: Vec<String>,
  112. pub content: Option<String>,
  113. #[serde(rename = "numLines")]
  114. pub num_lines: Option<usize>,
  115. #[serde(rename = "numMatches")]
  116. pub num_matches: Option<usize>,
  117. #[serde(rename = "appliedLimit")]
  118. pub applied_limit: Option<usize>,
  119. #[serde(rename = "appliedOffset")]
  120. pub applied_offset: Option<usize>,
  121. }
  122. pub fn read_file(
  123. path: &str,
  124. offset: Option<usize>,
  125. limit: Option<usize>,
  126. ) -> io::Result<ReadFileOutput> {
  127. let absolute_path = normalize_path(path)?;
  128. let content = fs::read_to_string(&absolute_path)?;
  129. let lines: Vec<&str> = content.lines().collect();
  130. let start_index = offset.unwrap_or(0).min(lines.len());
  131. let end_index = limit.map_or(lines.len(), |limit| {
  132. start_index.saturating_add(limit).min(lines.len())
  133. });
  134. let selected = lines[start_index..end_index].join("\n");
  135. Ok(ReadFileOutput {
  136. kind: String::from("text"),
  137. file: TextFilePayload {
  138. file_path: absolute_path.to_string_lossy().into_owned(),
  139. content: selected,
  140. num_lines: end_index.saturating_sub(start_index),
  141. start_line: start_index.saturating_add(1),
  142. total_lines: lines.len(),
  143. },
  144. })
  145. }
  146. pub fn write_file(path: &str, content: &str) -> io::Result<WriteFileOutput> {
  147. let absolute_path = normalize_path_allow_missing(path)?;
  148. let original_file = fs::read_to_string(&absolute_path).ok();
  149. if let Some(parent) = absolute_path.parent() {
  150. fs::create_dir_all(parent)?;
  151. }
  152. fs::write(&absolute_path, content)?;
  153. Ok(WriteFileOutput {
  154. kind: if original_file.is_some() {
  155. String::from("update")
  156. } else {
  157. String::from("create")
  158. },
  159. file_path: absolute_path.to_string_lossy().into_owned(),
  160. content: content.to_owned(),
  161. structured_patch: make_patch(original_file.as_deref().unwrap_or(""), content),
  162. original_file,
  163. git_diff: None,
  164. })
  165. }
  166. pub fn edit_file(
  167. path: &str,
  168. old_string: &str,
  169. new_string: &str,
  170. replace_all: bool,
  171. ) -> io::Result<EditFileOutput> {
  172. let absolute_path = normalize_path(path)?;
  173. let original_file = fs::read_to_string(&absolute_path)?;
  174. if old_string == new_string {
  175. return Err(io::Error::new(
  176. io::ErrorKind::InvalidInput,
  177. "old_string and new_string must differ",
  178. ));
  179. }
  180. if !original_file.contains(old_string) {
  181. return Err(io::Error::new(
  182. io::ErrorKind::NotFound,
  183. "old_string not found in file",
  184. ));
  185. }
  186. let updated = if replace_all {
  187. original_file.replace(old_string, new_string)
  188. } else {
  189. original_file.replacen(old_string, new_string, 1)
  190. };
  191. fs::write(&absolute_path, &updated)?;
  192. Ok(EditFileOutput {
  193. file_path: absolute_path.to_string_lossy().into_owned(),
  194. old_string: old_string.to_owned(),
  195. new_string: new_string.to_owned(),
  196. original_file: original_file.clone(),
  197. structured_patch: make_patch(&original_file, &updated),
  198. user_modified: false,
  199. replace_all,
  200. git_diff: None,
  201. })
  202. }
  203. pub fn glob_search(pattern: &str, path: Option<&str>) -> io::Result<GlobSearchOutput> {
  204. let started = Instant::now();
  205. let base_dir = path
  206. .map(normalize_path)
  207. .transpose()?
  208. .unwrap_or(std::env::current_dir()?);
  209. let search_pattern = if Path::new(pattern).is_absolute() {
  210. pattern.to_owned()
  211. } else {
  212. base_dir.join(pattern).to_string_lossy().into_owned()
  213. };
  214. let mut matches = Vec::new();
  215. let entries = glob::glob(&search_pattern)
  216. .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?;
  217. for entry in entries.flatten() {
  218. if entry.is_file() {
  219. matches.push(entry);
  220. }
  221. }
  222. matches.sort_by_key(|path| {
  223. fs::metadata(path)
  224. .and_then(|metadata| metadata.modified())
  225. .ok()
  226. .map(Reverse)
  227. });
  228. let truncated = matches.len() > 100;
  229. let filenames = matches
  230. .into_iter()
  231. .take(100)
  232. .map(|path| path.to_string_lossy().into_owned())
  233. .collect::<Vec<_>>();
  234. Ok(GlobSearchOutput {
  235. duration_ms: started.elapsed().as_millis(),
  236. num_files: filenames.len(),
  237. filenames,
  238. truncated,
  239. })
  240. }
  241. pub fn grep_search(input: &GrepSearchInput) -> io::Result<GrepSearchOutput> {
  242. let base_path = input
  243. .path
  244. .as_deref()
  245. .map(normalize_path)
  246. .transpose()?
  247. .unwrap_or(std::env::current_dir()?);
  248. let regex = RegexBuilder::new(&input.pattern)
  249. .case_insensitive(input.case_insensitive.unwrap_or(false))
  250. .dot_matches_new_line(input.multiline.unwrap_or(false))
  251. .build()
  252. .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?;
  253. let glob_filter = input
  254. .glob
  255. .as_deref()
  256. .map(Pattern::new)
  257. .transpose()
  258. .map_err(|error| io::Error::new(io::ErrorKind::InvalidInput, error.to_string()))?;
  259. let file_type = input.file_type.as_deref();
  260. let output_mode = input
  261. .output_mode
  262. .clone()
  263. .unwrap_or_else(|| String::from("files_with_matches"));
  264. let context = input.context.or(input.context_short).unwrap_or(0);
  265. let mut filenames = Vec::new();
  266. let mut content_lines = Vec::new();
  267. let mut total_matches = 0usize;
  268. for file_path in collect_search_files(&base_path)? {
  269. if !matches_optional_filters(&file_path, glob_filter.as_ref(), file_type) {
  270. continue;
  271. }
  272. let Ok(file_contents) = fs::read_to_string(&file_path) else {
  273. continue;
  274. };
  275. if output_mode == "count" {
  276. let count = regex.find_iter(&file_contents).count();
  277. if count > 0 {
  278. filenames.push(file_path.to_string_lossy().into_owned());
  279. total_matches += count;
  280. }
  281. continue;
  282. }
  283. let lines: Vec<&str> = file_contents.lines().collect();
  284. let mut matched_lines = Vec::new();
  285. for (index, line) in lines.iter().enumerate() {
  286. if regex.is_match(line) {
  287. total_matches += 1;
  288. matched_lines.push(index);
  289. }
  290. }
  291. if matched_lines.is_empty() {
  292. continue;
  293. }
  294. filenames.push(file_path.to_string_lossy().into_owned());
  295. if output_mode == "content" {
  296. for index in matched_lines {
  297. let start = index.saturating_sub(input.before.unwrap_or(context));
  298. let end = (index + input.after.unwrap_or(context) + 1).min(lines.len());
  299. for (current, line) in lines.iter().enumerate().take(end).skip(start) {
  300. let prefix = if input.line_numbers.unwrap_or(true) {
  301. format!("{}:{}:", file_path.to_string_lossy(), current + 1)
  302. } else {
  303. format!("{}:", file_path.to_string_lossy())
  304. };
  305. content_lines.push(format!("{prefix}{line}"));
  306. }
  307. }
  308. }
  309. }
  310. let (filenames, applied_limit, applied_offset) =
  311. apply_limit(filenames, input.head_limit, input.offset);
  312. let content_output = if output_mode == "content" {
  313. let (lines, limit, offset) = apply_limit(content_lines, input.head_limit, input.offset);
  314. return Ok(GrepSearchOutput {
  315. mode: Some(output_mode),
  316. num_files: filenames.len(),
  317. filenames,
  318. num_lines: Some(lines.len()),
  319. content: Some(lines.join("\n")),
  320. num_matches: None,
  321. applied_limit: limit,
  322. applied_offset: offset,
  323. });
  324. } else {
  325. None
  326. };
  327. Ok(GrepSearchOutput {
  328. mode: Some(output_mode.clone()),
  329. num_files: filenames.len(),
  330. filenames,
  331. content: content_output,
  332. num_lines: None,
  333. num_matches: (output_mode == "count").then_some(total_matches),
  334. applied_limit,
  335. applied_offset,
  336. })
  337. }
  338. fn collect_search_files(base_path: &Path) -> io::Result<Vec<PathBuf>> {
  339. if base_path.is_file() {
  340. return Ok(vec![base_path.to_path_buf()]);
  341. }
  342. let mut files = Vec::new();
  343. for entry in WalkDir::new(base_path) {
  344. let entry = entry.map_err(|error| io::Error::other(error.to_string()))?;
  345. if entry.file_type().is_file() {
  346. files.push(entry.path().to_path_buf());
  347. }
  348. }
  349. Ok(files)
  350. }
  351. fn matches_optional_filters(
  352. path: &Path,
  353. glob_filter: Option<&Pattern>,
  354. file_type: Option<&str>,
  355. ) -> bool {
  356. if let Some(glob_filter) = glob_filter {
  357. let path_string = path.to_string_lossy();
  358. if !glob_filter.matches(&path_string) && !glob_filter.matches_path(path) {
  359. return false;
  360. }
  361. }
  362. if let Some(file_type) = file_type {
  363. let extension = path.extension().and_then(|extension| extension.to_str());
  364. if extension != Some(file_type) {
  365. return false;
  366. }
  367. }
  368. true
  369. }
  370. fn apply_limit<T>(
  371. items: Vec<T>,
  372. limit: Option<usize>,
  373. offset: Option<usize>,
  374. ) -> (Vec<T>, Option<usize>, Option<usize>) {
  375. let offset_value = offset.unwrap_or(0);
  376. let mut items = items.into_iter().skip(offset_value).collect::<Vec<_>>();
  377. let explicit_limit = limit.unwrap_or(250);
  378. if explicit_limit == 0 {
  379. return (items, None, (offset_value > 0).then_some(offset_value));
  380. }
  381. let truncated = items.len() > explicit_limit;
  382. items.truncate(explicit_limit);
  383. (
  384. items,
  385. truncated.then_some(explicit_limit),
  386. (offset_value > 0).then_some(offset_value),
  387. )
  388. }
  389. fn make_patch(original: &str, updated: &str) -> Vec<StructuredPatchHunk> {
  390. let mut lines = Vec::new();
  391. for line in original.lines() {
  392. lines.push(format!("-{line}"));
  393. }
  394. for line in updated.lines() {
  395. lines.push(format!("+{line}"));
  396. }
  397. vec![StructuredPatchHunk {
  398. old_start: 1,
  399. old_lines: original.lines().count(),
  400. new_start: 1,
  401. new_lines: updated.lines().count(),
  402. lines,
  403. }]
  404. }
  405. fn normalize_path(path: &str) -> io::Result<PathBuf> {
  406. let candidate = if Path::new(path).is_absolute() {
  407. PathBuf::from(path)
  408. } else {
  409. std::env::current_dir()?.join(path)
  410. };
  411. candidate.canonicalize()
  412. }
  413. fn normalize_path_allow_missing(path: &str) -> io::Result<PathBuf> {
  414. let candidate = if Path::new(path).is_absolute() {
  415. PathBuf::from(path)
  416. } else {
  417. std::env::current_dir()?.join(path)
  418. };
  419. if let Ok(canonical) = candidate.canonicalize() {
  420. return Ok(canonical);
  421. }
  422. if let Some(parent) = candidate.parent() {
  423. let canonical_parent = parent
  424. .canonicalize()
  425. .unwrap_or_else(|_| parent.to_path_buf());
  426. if let Some(name) = candidate.file_name() {
  427. return Ok(canonical_parent.join(name));
  428. }
  429. }
  430. Ok(candidate)
  431. }
  432. #[cfg(test)]
  433. mod tests {
  434. use std::time::{SystemTime, UNIX_EPOCH};
  435. use super::{edit_file, glob_search, grep_search, read_file, write_file, GrepSearchInput};
  436. fn temp_path(name: &str) -> std::path::PathBuf {
  437. let unique = SystemTime::now()
  438. .duration_since(UNIX_EPOCH)
  439. .expect("time should move forward")
  440. .as_nanos();
  441. std::env::temp_dir().join(format!("clawd-native-{name}-{unique}"))
  442. }
  443. #[test]
  444. fn reads_and_writes_files() {
  445. let path = temp_path("read-write.txt");
  446. let write_output = write_file(path.to_string_lossy().as_ref(), "one\ntwo\nthree")
  447. .expect("write should succeed");
  448. assert_eq!(write_output.kind, "create");
  449. let read_output = read_file(path.to_string_lossy().as_ref(), Some(1), Some(1))
  450. .expect("read should succeed");
  451. assert_eq!(read_output.file.content, "two");
  452. }
  453. #[test]
  454. fn edits_file_contents() {
  455. let path = temp_path("edit.txt");
  456. write_file(path.to_string_lossy().as_ref(), "alpha beta alpha")
  457. .expect("initial write should succeed");
  458. let output = edit_file(path.to_string_lossy().as_ref(), "alpha", "omega", true)
  459. .expect("edit should succeed");
  460. assert!(output.replace_all);
  461. }
  462. #[test]
  463. fn globs_and_greps_directory() {
  464. let dir = temp_path("search-dir");
  465. std::fs::create_dir_all(&dir).expect("directory should be created");
  466. let file = dir.join("demo.rs");
  467. write_file(
  468. file.to_string_lossy().as_ref(),
  469. "fn main() {\n println!(\"hello\");\n}\n",
  470. )
  471. .expect("file write should succeed");
  472. let globbed = glob_search("**/*.rs", Some(dir.to_string_lossy().as_ref()))
  473. .expect("glob should succeed");
  474. assert_eq!(globbed.num_files, 1);
  475. let grep_output = grep_search(&GrepSearchInput {
  476. pattern: String::from("hello"),
  477. path: Some(dir.to_string_lossy().into_owned()),
  478. glob: Some(String::from("**/*.rs")),
  479. output_mode: Some(String::from("content")),
  480. before: None,
  481. after: None,
  482. context_short: None,
  483. context: None,
  484. line_numbers: Some(true),
  485. case_insensitive: Some(false),
  486. file_type: None,
  487. head_limit: Some(10),
  488. offset: Some(0),
  489. multiline: Some(false),
  490. })
  491. .expect("grep should succeed");
  492. assert!(grep_output.content.unwrap_or_default().contains("hello"));
  493. }
  494. }