před 3 měsíci · 5b106b840d
--- a/rust/Cargo.lock
+++ b/rust/Cargo.lock
@@ -212,6 +212,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 
				 checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
			
 
				 dependencies = [
			
 
				  "futures-core",
			
 
				+ "futures-sink",
			
 
				 ]
			
 
				 
			
 
				 [[package]]
			
@@ -220,6 +221,18 @@ version = "0.3.32"
 
				 source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				 checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
			
 
				 
			
 
				+[[package]]
			
 
				+name = "futures-io"
			
 
				+version = "0.3.32"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
			
 
				+
			
 
				+[[package]]
			
 
				+name = "futures-sink"
			
 
				+version = "0.3.32"
			
 
				+source = "registry+https://github.com/rust-lang/crates.io-index"
			
 
				+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
			
 
				+
			
 
				 [[package]]
			
 
				 name = "futures-task"
			
 
				 version = "0.3.32"
			
@@ -233,7 +246,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 
				 checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
			
 
				 dependencies = [
			
 
				  "futures-core",
			
 
				+ "futures-io",
			
 
				+ "futures-sink",
			
 
				  "futures-task",
			
 
				+ "memchr",
			
 
				  "pin-project-lite",
			
 
				  "slab",
			
 
				 ]
			
@@ -898,7 +914,9 @@ checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
 
				 dependencies = [
			
 
				  "base64",
			
 
				  "bytes",
			
 
				+ "futures-channel",
			
 
				  "futures-core",
			
 
				+ "futures-util",
			
 
				  "http",
			
 
				  "http-body",
			
 
				  "http-body-util",
			
@@ -1352,6 +1370,7 @@ dependencies = [
 
				 name = "tools"
			
 
				 version = "0.1.0"
			
 
				 dependencies = [
			
 
				+ "reqwest",
			
 
				  "runtime",
			
 
				  "serde",
			
 
				  "serde_json",
			
--- a/rust/crates/tools/Cargo.toml
+++ b/rust/crates/tools/Cargo.toml
@@ -7,6 +7,7 @@ publish.workspace = true
 
				 
			
 
				 [dependencies]
			
 
				 runtime = { path = "../runtime" }
			
 
				+reqwest = { version = "0.12", default-features = false, features = ["blocking", "rustls-tls"] }
			
 
				 serde = { version = "1", features = ["derive"] }
			
 
				 serde_json = "1"
			
 
				 
			
--- a/rust/crates/tools/src/lib.rs
+++ b/rust/crates/tools/src/lib.rs
@@ -1,8 +1,12 @@
 
				+use std::collections::BTreeSet;
			
 
				+use std::time::{Duration, Instant};
			
 
				+
			
 
				+use reqwest::blocking::Client;
			
 
				 use runtime::{
			
 
				     edit_file, execute_bash, glob_search, grep_search, read_file, write_file, BashCommandInput,
			
 
				     GrepSearchInput,
			
 
				 };
			
 
				-use serde::Deserialize;
			
 
				+use serde::{Deserialize, Serialize};
			
 
				 use serde_json::{json, Value};
			
 
				 
			
 
				 #[derive(Debug, Clone, PartialEq, Eq)]
			
@@ -140,6 +144,40 @@ pub fn mvp_tool_specs() -> Vec<ToolSpec> {
 
				                 "additionalProperties": false
			
 
				             }),
			
 
				         },
			
 
				+        ToolSpec {
			
 
				+            name: "WebFetch",
			
 
				+            description:
			
 
				+                "Fetch a URL, convert it into readable text, and answer a prompt about it.",
			
 
				+            input_schema: json!({
			
 
				+                "type": "object",
			
 
				+                "properties": {
			
 
				+                    "url": { "type": "string", "format": "uri" },
			
 
				+                    "prompt": { "type": "string" }
			
 
				+                },
			
 
				+                "required": ["url", "prompt"],
			
 
				+                "additionalProperties": false
			
 
				+            }),
			
 
				+        },
			
 
				+        ToolSpec {
			
 
				+            name: "WebSearch",
			
 
				+            description: "Search the web for current information and return cited results.",
			
 
				+            input_schema: json!({
			
 
				+                "type": "object",
			
 
				+                "properties": {
			
 
				+                    "query": { "type": "string", "minLength": 2 },
			
 
				+                    "allowed_domains": {
			
 
				+                        "type": "array",
			
 
				+                        "items": { "type": "string" }
			
 
				+                    },
			
 
				+                    "blocked_domains": {
			
 
				+                        "type": "array",
			
 
				+                        "items": { "type": "string" }
			
 
				+                    }
			
 
				+                },
			
 
				+                "required": ["query"],
			
 
				+                "additionalProperties": false
			
 
				+            }),
			
 
				+        },
			
 
				     ]
			
 
				 }
			
 
				 
			
@@ -151,6 +189,8 @@ pub fn execute_tool(name: &str, input: &Value) -> Result<String, String> {
 
				         "edit_file" => from_value::<EditFileInput>(input).and_then(run_edit_file),
			
 
				         "glob_search" => from_value::<GlobSearchInputValue>(input).and_then(run_glob_search),
			
 
				         "grep_search" => from_value::<GrepSearchInput>(input).and_then(run_grep_search),
			
 
				+        "WebFetch" => from_value::<WebFetchInput>(input).and_then(run_web_fetch),
			
 
				+        "WebSearch" => from_value::<WebSearchInput>(input).and_then(run_web_search),
			
 
				         _ => Err(format!("unsupported tool: {name}")),
			
 
				     }
			
 
				 }
			
@@ -192,6 +232,14 @@ fn run_grep_search(input: GrepSearchInput) -> Result<String, String> {
 
				     to_pretty_json(grep_search(&input).map_err(io_to_string)?)
			
 
				 }
			
 
				 
			
 
				+fn run_web_fetch(input: WebFetchInput) -> Result<String, String> {
			
 
				+    to_pretty_json(execute_web_fetch(&input)?)
			
 
				+}
			
 
				+
			
 
				+fn run_web_search(input: WebSearchInput) -> Result<String, String> {
			
 
				+    to_pretty_json(execute_web_search(&input)?)
			
 
				+}
			
 
				+
			
 
				 fn to_pretty_json<T: serde::Serialize>(value: T) -> Result<String, String> {
			
 
				     serde_json::to_string_pretty(&value).map_err(|error| error.to_string())
			
 
				 }
			
@@ -227,8 +275,411 @@ struct GlobSearchInputValue {
 
				     path: Option<String>,
			
 
				 }
			
 
				 
			
 
				+#[derive(Debug, Deserialize)]
			
 
				+struct WebFetchInput {
			
 
				+    url: String,
			
 
				+    prompt: String,
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Deserialize)]
			
 
				+struct WebSearchInput {
			
 
				+    query: String,
			
 
				+    allowed_domains: Option<Vec<String>>,
			
 
				+    blocked_domains: Option<Vec<String>>,
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Serialize)]
			
 
				+struct WebFetchOutput {
			
 
				+    bytes: usize,
			
 
				+    code: u16,
			
 
				+    #[serde(rename = "codeText")]
			
 
				+    code_text: String,
			
 
				+    result: String,
			
 
				+    #[serde(rename = "durationMs")]
			
 
				+    duration_ms: u128,
			
 
				+    url: String,
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Serialize)]
			
 
				+struct WebSearchOutput {
			
 
				+    query: String,
			
 
				+    results: Vec<WebSearchResultItem>,
			
 
				+    #[serde(rename = "durationSeconds")]
			
 
				+    duration_seconds: f64,
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Serialize)]
			
 
				+#[serde(untagged)]
			
 
				+enum WebSearchResultItem {
			
 
				+    SearchResult {
			
 
				+        tool_use_id: String,
			
 
				+        content: Vec<SearchHit>,
			
 
				+    },
			
 
				+    Commentary(String),
			
 
				+}
			
 
				+
			
 
				+#[derive(Debug, Serialize)]
			
 
				+struct SearchHit {
			
 
				+    title: String,
			
 
				+    url: String,
			
 
				+}
			
 
				+
			
 
				+fn execute_web_fetch(input: &WebFetchInput) -> Result<WebFetchOutput, String> {
			
 
				+    let started = Instant::now();
			
 
				+    let client = build_http_client()?;
			
 
				+    let request_url = normalize_fetch_url(&input.url)?;
			
 
				+    let response = client
			
 
				+        .get(request_url.clone())
			
 
				+        .send()
			
 
				+        .map_err(|error| error.to_string())?;
			
 
				+
			
 
				+    let status = response.status();
			
 
				+    let final_url = response.url().to_string();
			
 
				+    let code = status.as_u16();
			
 
				+    let code_text = status.canonical_reason().unwrap_or("Unknown").to_string();
			
 
				+    let content_type = response
			
 
				+        .headers()
			
 
				+        .get(reqwest::header::CONTENT_TYPE)
			
 
				+        .and_then(|value| value.to_str().ok())
			
 
				+        .unwrap_or_default()
			
 
				+        .to_string();
			
 
				+    let body = response.text().map_err(|error| error.to_string())?;
			
 
				+    let bytes = body.len();
			
 
				+    let normalized = normalize_fetched_content(&body, &content_type);
			
 
				+    let result = summarize_web_fetch(&final_url, &input.prompt, &normalized);
			
 
				+
			
 
				+    Ok(WebFetchOutput {
			
 
				+        bytes,
			
 
				+        code,
			
 
				+        code_text,
			
 
				+        result,
			
 
				+        duration_ms: started.elapsed().as_millis(),
			
 
				+        url: final_url,
			
 
				+    })
			
 
				+}
			
 
				+
			
 
				+fn execute_web_search(input: &WebSearchInput) -> Result<WebSearchOutput, String> {
			
 
				+    let started = Instant::now();
			
 
				+    let client = build_http_client()?;
			
 
				+    let search_url = build_search_url(&input.query)?;
			
 
				+    let response = client
			
 
				+        .get(search_url)
			
 
				+        .send()
			
 
				+        .map_err(|error| error.to_string())?;
			
 
				+
			
 
				+    let final_url = response.url().clone();
			
 
				+    let html = response.text().map_err(|error| error.to_string())?;
			
 
				+    let mut hits = extract_search_hits(&html);
			
 
				+
			
 
				+    if hits.is_empty() && final_url.host_str().is_some() {
			
 
				+        hits = extract_search_hits_from_generic_links(&html);
			
 
				+    }
			
 
				+
			
 
				+    if let Some(allowed) = input.allowed_domains.as_ref() {
			
 
				+        hits.retain(|hit| host_matches_list(&hit.url, allowed));
			
 
				+    }
			
 
				+    if let Some(blocked) = input.blocked_domains.as_ref() {
			
 
				+        hits.retain(|hit| !host_matches_list(&hit.url, blocked));
			
 
				+    }
			
 
				+
			
 
				+    dedupe_hits(&mut hits);
			
 
				+    hits.truncate(8);
			
 
				+
			
 
				+    let summary = if hits.is_empty() {
			
 
				+        format!("No web search results matched the query {:?}.", input.query)
			
 
				+    } else {
			
 
				+        let rendered_hits = hits
			
 
				+            .iter()
			
 
				+            .map(|hit| format!("- [{}]({})", hit.title, hit.url))
			
 
				+            .collect::<Vec<_>>()
			
 
				+            .join("\n");
			
 
				+        format!(
			
 
				+            "Search results for {:?}. Include a Sources section in the final answer.\n{}",
			
 
				+            input.query, rendered_hits
			
 
				+        )
			
 
				+    };
			
 
				+
			
 
				+    Ok(WebSearchOutput {
			
 
				+        query: input.query.clone(),
			
 
				+        results: vec![
			
 
				+            WebSearchResultItem::Commentary(summary),
			
 
				+            WebSearchResultItem::SearchResult {
			
 
				+                tool_use_id: String::from("web_search_1"),
			
 
				+                content: hits,
			
 
				+            },
			
 
				+        ],
			
 
				+        duration_seconds: started.elapsed().as_secs_f64(),
			
 
				+    })
			
 
				+}
			
 
				+
			
 
				+fn build_http_client() -> Result<Client, String> {
			
 
				+    Client::builder()
			
 
				+        .timeout(Duration::from_secs(20))
			
 
				+        .redirect(reqwest::redirect::Policy::limited(10))
			
 
				+        .user_agent("clawd-rust-tools/0.1")
			
 
				+        .build()
			
 
				+        .map_err(|error| error.to_string())
			
 
				+}
			
 
				+
			
 
				+fn normalize_fetch_url(url: &str) -> Result<String, String> {
			
 
				+    let parsed = reqwest::Url::parse(url).map_err(|error| error.to_string())?;
			
 
				+    if parsed.scheme() == "http" {
			
 
				+        let host = parsed.host_str().unwrap_or_default();
			
 
				+        if host != "localhost" && host != "127.0.0.1" && host != "::1" {
			
 
				+            let mut upgraded = parsed;
			
 
				+            upgraded
			
 
				+                .set_scheme("https")
			
 
				+                .map_err(|_| String::from("failed to upgrade URL to https"))?;
			
 
				+            return Ok(upgraded.to_string());
			
 
				+        }
			
 
				+    }
			
 
				+    Ok(parsed.to_string())
			
 
				+}
			
 
				+
			
 
				+fn build_search_url(query: &str) -> Result<reqwest::Url, String> {
			
 
				+    if let Ok(base) = std::env::var("CLAWD_WEB_SEARCH_BASE_URL") {
			
 
				+        let mut url = reqwest::Url::parse(&base).map_err(|error| error.to_string())?;
			
 
				+        url.query_pairs_mut().append_pair("q", query);
			
 
				+        return Ok(url);
			
 
				+    }
			
 
				+
			
 
				+    let mut url = reqwest::Url::parse("https://html.duckduckgo.com/html/")
			
 
				+        .map_err(|error| error.to_string())?;
			
 
				+    url.query_pairs_mut().append_pair("q", query);
			
 
				+    Ok(url)
			
 
				+}
			
 
				+
			
 
				+fn normalize_fetched_content(body: &str, content_type: &str) -> String {
			
 
				+    if content_type.contains("html") {
			
 
				+        html_to_text(body)
			
 
				+    } else {
			
 
				+        body.trim().to_string()
			
 
				+    }
			
 
				+}
			
 
				+
			
 
				+fn summarize_web_fetch(url: &str, prompt: &str, content: &str) -> String {
			
 
				+    let lower_prompt = prompt.to_lowercase();
			
 
				+    let compact = collapse_whitespace(content);
			
 
				+
			
 
				+    let detail = if lower_prompt.contains("title") {
			
 
				+        extract_title(content)
			
 
				+            .map(|title| format!("Title: {title}"))
			
 
				+            .unwrap_or_else(|| preview_text(&compact, 600))
			
 
				+    } else if lower_prompt.contains("summary") || lower_prompt.contains("summarize") {
			
 
				+        preview_text(&compact, 900)
			
 
				+    } else {
			
 
				+        let preview = preview_text(&compact, 900);
			
 
				+        format!("Prompt: {prompt}\nContent preview:\n{preview}")
			
 
				+    };
			
 
				+
			
 
				+    format!("Fetched {url}\n{detail}")
			
 
				+}
			
 
				+
			
 
				+fn extract_title(content: &str) -> Option<String> {
			
 
				+    for line in content.lines() {
			
 
				+        let trimmed = line.trim();
			
 
				+        if !trimmed.is_empty() {
			
 
				+            return Some(trimmed.to_string());
			
 
				+        }
			
 
				+    }
			
 
				+    None
			
 
				+}
			
 
				+
			
 
				+fn html_to_text(html: &str) -> String {
			
 
				+    let mut text = String::with_capacity(html.len());
			
 
				+    let mut in_tag = false;
			
 
				+    let mut previous_was_space = false;
			
 
				+
			
 
				+    for ch in html.chars() {
			
 
				+        match ch {
			
 
				+            '<' => in_tag = true,
			
 
				+            '>' => in_tag = false,
			
 
				+            _ if in_tag => {}
			
 
				+            '&' => {
			
 
				+                text.push('&');
			
 
				+                previous_was_space = false;
			
 
				+            }
			
 
				+            ch if ch.is_whitespace() => {
			
 
				+                if !previous_was_space {
			
 
				+                    text.push(' ');
			
 
				+                    previous_was_space = true;
			
 
				+                }
			
 
				+            }
			
 
				+            _ => {
			
 
				+                text.push(ch);
			
 
				+                previous_was_space = false;
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    collapse_whitespace(&decode_html_entities(&text))
			
 
				+}
			
 
				+
			
 
				+fn decode_html_entities(input: &str) -> String {
			
 
				+    input
			
 
				+        .replace("&amp;", "&")
			
 
				+        .replace("&lt;", "<")
			
 
				+        .replace("&gt;", ">")
			
 
				+        .replace("&quot;", "\"")
			
 
				+        .replace("&#39;", "'")
			
 
				+        .replace("&nbsp;", " ")
			
 
				+}
			
 
				+
			
 
				+fn collapse_whitespace(input: &str) -> String {
			
 
				+    input.split_whitespace().collect::<Vec<_>>().join(" ")
			
 
				+}
			
 
				+
			
 
				+fn preview_text(input: &str, max_chars: usize) -> String {
			
 
				+    if input.chars().count() <= max_chars {
			
 
				+        return input.to_string();
			
 
				+    }
			
 
				+    let shortened = input.chars().take(max_chars).collect::<String>();
			
 
				+    format!("{}…", shortened.trim_end())
			
 
				+}
			
 
				+
			
 
				+fn extract_search_hits(html: &str) -> Vec<SearchHit> {
			
 
				+    let mut hits = Vec::new();
			
 
				+    let mut remaining = html;
			
 
				+
			
 
				+    while let Some(anchor_start) = remaining.find("result__a") {
			
 
				+        let after_class = &remaining[anchor_start..];
			
 
				+        let Some(href_idx) = after_class.find("href=") else {
			
 
				+            remaining = &after_class[1..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let href_slice = &after_class[href_idx + 5..];
			
 
				+        let Some((url, rest)) = extract_quoted_value(href_slice) else {
			
 
				+            remaining = &after_class[1..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let Some(close_tag_idx) = rest.find('>') else {
			
 
				+            remaining = &after_class[1..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let after_tag = &rest[close_tag_idx + 1..];
			
 
				+        let Some(end_anchor_idx) = after_tag.find("</a>") else {
			
 
				+            remaining = &after_tag[1..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let title = html_to_text(&after_tag[..end_anchor_idx]);
			
 
				+        if let Some(decoded_url) = decode_duckduckgo_redirect(&url) {
			
 
				+            hits.push(SearchHit {
			
 
				+                title: title.trim().to_string(),
			
 
				+                url: decoded_url,
			
 
				+            });
			
 
				+        }
			
 
				+        remaining = &after_tag[end_anchor_idx + 4..];
			
 
				+    }
			
 
				+
			
 
				+    hits
			
 
				+}
			
 
				+
			
 
				+fn extract_search_hits_from_generic_links(html: &str) -> Vec<SearchHit> {
			
 
				+    let mut hits = Vec::new();
			
 
				+    let mut remaining = html;
			
 
				+
			
 
				+    while let Some(anchor_start) = remaining.find("<a") {
			
 
				+        let after_anchor = &remaining[anchor_start..];
			
 
				+        let Some(href_idx) = after_anchor.find("href=") else {
			
 
				+            remaining = &after_anchor[2..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let href_slice = &after_anchor[href_idx + 5..];
			
 
				+        let Some((url, rest)) = extract_quoted_value(href_slice) else {
			
 
				+            remaining = &after_anchor[2..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let Some(close_tag_idx) = rest.find('>') else {
			
 
				+            remaining = &after_anchor[2..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let after_tag = &rest[close_tag_idx + 1..];
			
 
				+        let Some(end_anchor_idx) = after_tag.find("</a>") else {
			
 
				+            remaining = &after_anchor[2..];
			
 
				+            continue;
			
 
				+        };
			
 
				+        let title = html_to_text(&after_tag[..end_anchor_idx]);
			
 
				+        if title.trim().is_empty() {
			
 
				+            remaining = &after_tag[end_anchor_idx + 4..];
			
 
				+            continue;
			
 
				+        }
			
 
				+        let decoded_url = decode_duckduckgo_redirect(&url).unwrap_or(url);
			
 
				+        if decoded_url.starts_with("http://") || decoded_url.starts_with("https://") {
			
 
				+            hits.push(SearchHit {
			
 
				+                title: title.trim().to_string(),
			
 
				+                url: decoded_url,
			
 
				+            });
			
 
				+        }
			
 
				+        remaining = &after_tag[end_anchor_idx + 4..];
			
 
				+    }
			
 
				+
			
 
				+    hits
			
 
				+}
			
 
				+
			
 
				+fn extract_quoted_value(input: &str) -> Option<(String, &str)> {
			
 
				+    let quote = input.chars().next()?;
			
 
				+    if quote != '"' && quote != '\'' {
			
 
				+        return None;
			
 
				+    }
			
 
				+    let rest = &input[quote.len_utf8()..];
			
 
				+    let end = rest.find(quote)?;
			
 
				+    Some((rest[..end].to_string(), &rest[end + quote.len_utf8()..]))
			
 
				+}
			
 
				+
			
 
				+fn decode_duckduckgo_redirect(url: &str) -> Option<String> {
			
 
				+    if url.starts_with("http://") || url.starts_with("https://") {
			
 
				+        return Some(html_entity_decode_url(url));
			
 
				+    }
			
 
				+
			
 
				+    let joined = if url.starts_with("//") {
			
 
				+        format!("https:{url}")
			
 
				+    } else if url.starts_with('/') {
			
 
				+        format!("https://duckduckgo.com{url}")
			
 
				+    } else {
			
 
				+        return None;
			
 
				+    };
			
 
				+
			
 
				+    let parsed = reqwest::Url::parse(&joined).ok()?;
			
 
				+    if parsed.path() == "/l/" || parsed.path() == "/l" {
			
 
				+        for (key, value) in parsed.query_pairs() {
			
 
				+            if key == "uddg" {
			
 
				+                return Some(html_entity_decode_url(value.as_ref()));
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+    Some(joined)
			
 
				+}
			
 
				+
			
 
				+fn html_entity_decode_url(url: &str) -> String {
			
 
				+    decode_html_entities(url)
			
 
				+}
			
 
				+
			
 
				+fn host_matches_list(url: &str, domains: &[String]) -> bool {
			
 
				+    let Ok(parsed) = reqwest::Url::parse(url) else {
			
 
				+        return false;
			
 
				+    };
			
 
				+    let Some(host) = parsed.host_str() else {
			
 
				+        return false;
			
 
				+    };
			
 
				+    domains.iter().any(|domain| {
			
 
				+        let normalized = domain.trim().trim_start_matches('.');
			
 
				+        host == normalized || host.ends_with(&format!(".{normalized}"))
			
 
				+    })
			
 
				+}
			
 
				+
			
 
				+fn dedupe_hits(hits: &mut Vec<SearchHit>) {
			
 
				+    let mut seen = BTreeSet::new();
			
 
				+    hits.retain(|hit| seen.insert(hit.url.clone()));
			
 
				+}
			
 
				+
			
 
				 #[cfg(test)]
			
 
				 mod tests {
			
 
				+    use std::io::{Read, Write};
			
 
				+    use std::net::{SocketAddr, TcpListener};
			
 
				+    use std::sync::Arc;
			
 
				+    use std::thread;
			
 
				+    use std::time::Duration;
			
 
				+
			
 
				     use super::{execute_tool, mvp_tool_specs};
			
 
				     use serde_json::json;
			
 
				 
			
@@ -240,6 +691,8 @@ mod tests {
 
				             .collect::<Vec<_>>();
			
 
				         assert!(names.contains(&"bash"));
			
 
				         assert!(names.contains(&"read_file"));
			
 
				+        assert!(names.contains(&"WebFetch"));
			
 
				+        assert!(names.contains(&"WebSearch"));
			
 
				     }
			
 
				 
			
 
				     #[test]
			
@@ -247,4 +700,167 @@ mod tests {
 
				         let error = execute_tool("nope", &json!({})).expect_err("tool should be rejected");
			
 
				         assert!(error.contains("unsupported tool"));
			
 
				     }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn web_fetch_returns_prompt_aware_summary() {
			
 
				+        let server = TestServer::spawn(Arc::new(|request_line: &str| {
			
 
				+            assert!(request_line.starts_with("GET /page "));
			
 
				+            HttpResponse::html(
			
 
				+                200,
			
 
				+                "OK",
			
 
				+                "<html><head><title>Ignored</title></head><body><h1>Test Page</h1><p>Hello <b>world</b> from local server.</p></body></html>",
			
 
				+            )
			
 
				+        }));
			
 
				+
			
 
				+        let result = execute_tool(
			
 
				+            "WebFetch",
			
 
				+            &json!({
			
 
				+                "url": format!("http://{}/page", server.addr()),
			
 
				+                "prompt": "Summarize this page"
			
 
				+            }),
			
 
				+        )
			
 
				+        .expect("WebFetch should succeed");
			
 
				+
			
 
				+        let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
			
 
				+        assert_eq!(output["code"], 200);
			
 
				+        let summary = output["result"].as_str().expect("result string");
			
 
				+        assert!(summary.contains("Fetched"));
			
 
				+        assert!(summary.contains("Test Page"));
			
 
				+        assert!(summary.contains("Hello world from local server"));
			
 
				+    }
			
 
				+
			
 
				+    #[test]
			
 
				+    fn web_search_extracts_and_filters_results() {
			
 
				+        let server = TestServer::spawn(Arc::new(|request_line: &str| {
			
 
				+            assert!(request_line.contains("GET /search?q=rust+web+search "));
			
 
				+            HttpResponse::html(
			
 
				+                200,
			
 
				+                "OK",
			
 
				+                r#"
			
 
				+                <html><body>
			
 
				+                  <a class="result__a" href="https://docs.rs/reqwest">Reqwest docs</a>
			
 
				+                  <a class="result__a" href="https://example.com/blocked">Blocked result</a>
			
 
				+                </body></html>
			
 
				+                "#,
			
 
				+            )
			
 
				+        }));
			
 
				+
			
 
				+        std::env::set_var(
			
 
				+            "CLAWD_WEB_SEARCH_BASE_URL",
			
 
				+            format!("http://{}/search", server.addr()),
			
 
				+        );
			
 
				+        let result = execute_tool(
			
 
				+            "WebSearch",
			
 
				+            &json!({
			
 
				+                "query": "rust web search",
			
 
				+                "allowed_domains": ["docs.rs"],
			
 
				+                "blocked_domains": ["example.com"]
			
 
				+            }),
			
 
				+        )
			
 
				+        .expect("WebSearch should succeed");
			
 
				+        std::env::remove_var("CLAWD_WEB_SEARCH_BASE_URL");
			
 
				+
			
 
				+        let output: serde_json::Value = serde_json::from_str(&result).expect("valid json");
			
 
				+        assert_eq!(output["query"], "rust web search");
			
 
				+        let results = output["results"].as_array().expect("results array");
			
 
				+        let search_result = results
			
 
				+            .iter()
			
 
				+            .find(|item| item.get("content").is_some())
			
 
				+            .expect("search result block present");
			
 
				+        let content = search_result["content"].as_array().expect("content array");
			
 
				+        assert_eq!(content.len(), 1);
			
 
				+        assert_eq!(content[0]["title"], "Reqwest docs");
			
 
				+        assert_eq!(content[0]["url"], "https://docs.rs/reqwest");
			
 
				+    }
			
 
				+
			
 
				+    struct TestServer {
			
 
				+        addr: SocketAddr,
			
 
				+        shutdown: Option<std::sync::mpsc::Sender<()>>,
			
 
				+        handle: Option<thread::JoinHandle<()>>,
			
 
				+    }
			
 
				+
			
 
				+    impl TestServer {
			
 
				+        fn spawn(handler: Arc<dyn Fn(&str) -> HttpResponse + Send + Sync + 'static>) -> Self {
			
 
				+            let listener = TcpListener::bind("127.0.0.1:0").expect("bind test server");
			
 
				+            listener
			
 
				+                .set_nonblocking(true)
			
 
				+                .expect("set nonblocking listener");
			
 
				+            let addr = listener.local_addr().expect("local addr");
			
 
				+            let (tx, rx) = std::sync::mpsc::channel::<()>();
			
 
				+
			
 
				+            let handle = thread::spawn(move || loop {
			
 
				+                if rx.try_recv().is_ok() {
			
 
				+                    break;
			
 
				+                }
			
 
				+
			
 
				+                match listener.accept() {
			
 
				+                    Ok((mut stream, _)) => {
			
 
				+                        let mut buffer = [0_u8; 4096];
			
 
				+                        let size = stream.read(&mut buffer).expect("read request");
			
 
				+                        let request = String::from_utf8_lossy(&buffer[..size]).into_owned();
			
 
				+                        let request_line = request.lines().next().unwrap_or_default().to_string();
			
 
				+                        let response = handler(&request_line);
			
 
				+                        stream
			
 
				+                            .write_all(response.to_bytes().as_slice())
			
 
				+                            .expect("write response");
			
 
				+                    }
			
 
				+                    Err(error) if error.kind() == std::io::ErrorKind::WouldBlock => {
			
 
				+                        thread::sleep(Duration::from_millis(10));
			
 
				+                    }
			
 
				+                    Err(error) => panic!("server accept failed: {error}"),
			
 
				+                }
			
 
				+            });
			
 
				+
			
 
				+            Self {
			
 
				+                addr,
			
 
				+                shutdown: Some(tx),
			
 
				+                handle: Some(handle),
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        fn addr(&self) -> SocketAddr {
			
 
				+            self.addr
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    impl Drop for TestServer {
			
 
				+        fn drop(&mut self) {
			
 
				+            if let Some(tx) = self.shutdown.take() {
			
 
				+                let _ = tx.send(());
			
 
				+            }
			
 
				+            if let Some(handle) = self.handle.take() {
			
 
				+                handle.join().expect("join test server");
			
 
				+            }
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				+    struct HttpResponse {
			
 
				+        status: u16,
			
 
				+        reason: &'static str,
			
 
				+        content_type: &'static str,
			
 
				+        body: String,
			
 
				+    }
			
 
				+
			
 
				+    impl HttpResponse {
			
 
				+        fn html(status: u16, reason: &'static str, body: &str) -> Self {
			
 
				+            Self {
			
 
				+                status,
			
 
				+                reason,
			
 
				+                content_type: "text/html; charset=utf-8",
			
 
				+                body: body.to_string(),
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        fn to_bytes(&self) -> Vec<u8> {
			
 
				+            format!(
			
 
				+                "HTTP/1.1 {} {}\r\nContent-Type: {}\r\nContent-Length: {}\r\nConnection: close\r\n\r\n{}",
			
 
				+                self.status,
			
 
				+                self.reason,
			
 
				+                self.content_type,
			
 
				+                self.body.len(),
			
 
				+                self.body
			
 
				+            )
			
 
				+            .into_bytes()
			
 
				+        }
			
 
				+    }
			
 
				 }