فهرست منبع

Expose structured thinking without polluting normal assistant output

Extended thinking needed to travel end-to-end through the API,
runtime, and CLI so the client can request a thinking budget,
preserve streamed reasoning blocks, and present them in a
collapsed text-first form. The implementation keeps thinking
strictly opt-in, adds a session-local toggle, and reuses the
existing flag/slash-command/reporting surfaces instead of
introducing a new UI layer.

Constraint: Existing non-thinking text/tool flows had to remain backward compatible by default
Constraint: Terminal UX needed a lightweight collapsed representation rather than an interactive TUI widget
Rejected: Heuristic CLI-only parsing of reasoning text | brittle against structured stream payloads
Rejected: Expanded raw thinking output by default | too noisy for normal assistant responses
Confidence: medium
Scope-risk: moderate
Reversibility: clean
Directive: Keep thinking blocks structurally separate from answer text unless the upstream API contract changes
Tested: cargo fmt --all; cargo clippy --workspace --all-targets -- -D warnings; cargo test -q
Not-tested: Live upstream thinking payloads against the production API contract
Yeachan-Heo 2 ماه پیش
والد
کامیت
c14196c730

+ 1 - 0
rust/crates/api/src/client.rs

@@ -912,6 +912,7 @@ mod tests {
             system: None,
             tools: None,
             tool_choice: None,
+            thinking: None,
             stream: false,
         };
 

+ 1 - 1
rust/crates/api/src/lib.rs

@@ -13,5 +13,5 @@ pub use types::{
     ContentBlockDelta, ContentBlockDeltaEvent, ContentBlockStartEvent, ContentBlockStopEvent,
     InputContentBlock, InputMessage, MessageDelta, MessageDeltaEvent, MessageRequest,
     MessageResponse, MessageStartEvent, MessageStopEvent, OutputContentBlock, StreamEvent,
-    ToolChoice, ToolDefinition, ToolResultContentBlock, Usage,
+    ThinkingConfig, ToolChoice, ToolDefinition, ToolResultContentBlock, Usage,
 };

+ 26 - 0
rust/crates/api/src/types.rs

@@ -12,6 +12,8 @@ pub struct MessageRequest {
     pub tools: Option<Vec<ToolDefinition>>,
     #[serde(skip_serializing_if = "Option::is_none")]
     pub tool_choice: Option<ToolChoice>,
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub thinking: Option<ThinkingConfig>,
     #[serde(default, skip_serializing_if = "std::ops::Not::not")]
     pub stream: bool,
 }
@@ -24,6 +26,23 @@ impl MessageRequest {
     }
 }
 
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct ThinkingConfig {
+    #[serde(rename = "type")]
+    pub kind: String,
+    pub budget_tokens: u32,
+}
+
+impl ThinkingConfig {
+    #[must_use]
+    pub fn enabled(budget_tokens: u32) -> Self {
+        Self {
+            kind: "enabled".to_string(),
+            budget_tokens,
+        }
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
 pub struct InputMessage {
     pub role: String,
@@ -130,6 +149,11 @@ pub enum OutputContentBlock {
     Text {
         text: String,
     },
+    Thinking {
+        thinking: String,
+        #[serde(default, skip_serializing_if = "Option::is_none")]
+        signature: Option<String>,
+    },
     ToolUse {
         id: String,
         name: String,
@@ -189,6 +213,8 @@ pub struct ContentBlockDeltaEvent {
 #[serde(tag = "type", rename_all = "snake_case")]
 pub enum ContentBlockDelta {
     TextDelta { text: String },
+    ThinkingDelta { thinking: String },
+    SignatureDelta { signature: String },
     InputJsonDelta { partial_json: String },
 }
 

+ 2 - 0
rust/crates/api/tests/client_integration.rs

@@ -258,6 +258,7 @@ async fn live_stream_smoke_test() {
             system: None,
             tools: None,
             tool_choice: None,
+            thinking: None,
             stream: false,
         })
         .await
@@ -438,6 +439,7 @@ fn sample_request(stream: bool) -> MessageRequest {
             }),
         }]),
         tool_choice: Some(ToolChoice::Auto),
+        thinking: None,
         stream,
     }
 }

+ 38 - 1
rust/crates/commands/src/lib.rs

@@ -57,6 +57,12 @@ const SLASH_COMMAND_SPECS: &[SlashCommandSpec] = &[
         argument_hint: None,
         resume_supported: true,
     },
+    SlashCommandSpec {
+        name: "thinking",
+        summary: "Show or toggle extended thinking",
+        argument_hint: Some("[on|off]"),
+        resume_supported: false,
+    },
     SlashCommandSpec {
         name: "model",
         summary: "Show or switch the active model",
@@ -136,6 +142,9 @@ pub enum SlashCommand {
     Help,
     Status,
     Compact,
+    Thinking {
+        enabled: Option<bool>,
+    },
     Model {
         model: Option<String>,
     },
@@ -180,6 +189,13 @@ impl SlashCommand {
             "help" => Self::Help,
             "status" => Self::Status,
             "compact" => Self::Compact,
+            "thinking" => Self::Thinking {
+                enabled: match parts.next() {
+                    Some("on") => Some(true),
+                    Some("off") => Some(false),
+                    Some(_) | None => None,
+                },
+            },
             "model" => Self::Model {
                 model: parts.next().map(ToOwned::to_owned),
             },
@@ -279,6 +295,7 @@ pub fn handle_slash_command(
             session: session.clone(),
         }),
         SlashCommand::Status
+        | SlashCommand::Thinking { .. }
         | SlashCommand::Model { .. }
         | SlashCommand::Permissions { .. }
         | SlashCommand::Clear { .. }
@@ -307,6 +324,22 @@ mod tests {
     fn parses_supported_slash_commands() {
         assert_eq!(SlashCommand::parse("/help"), Some(SlashCommand::Help));
         assert_eq!(SlashCommand::parse(" /status "), Some(SlashCommand::Status));
+        assert_eq!(
+            SlashCommand::parse("/thinking on"),
+            Some(SlashCommand::Thinking {
+                enabled: Some(true),
+            })
+        );
+        assert_eq!(
+            SlashCommand::parse("/thinking off"),
+            Some(SlashCommand::Thinking {
+                enabled: Some(false),
+            })
+        );
+        assert_eq!(
+            SlashCommand::parse("/thinking"),
+            Some(SlashCommand::Thinking { enabled: None })
+        );
         assert_eq!(
             SlashCommand::parse("/model claude-opus"),
             Some(SlashCommand::Model {
@@ -374,6 +407,7 @@ mod tests {
         assert!(help.contains("/help"));
         assert!(help.contains("/status"));
         assert!(help.contains("/compact"));
+        assert!(help.contains("/thinking [on|off]"));
         assert!(help.contains("/model [model]"));
         assert!(help.contains("/permissions [read-only|workspace-write|danger-full-access]"));
         assert!(help.contains("/clear [--confirm]"));
@@ -386,7 +420,7 @@ mod tests {
         assert!(help.contains("/version"));
         assert!(help.contains("/export [file]"));
         assert!(help.contains("/session [list|switch <session-id>]"));
-        assert_eq!(slash_command_specs().len(), 15);
+        assert_eq!(slash_command_specs().len(), 16);
         assert_eq!(resume_supported_slash_commands().len(), 11);
     }
 
@@ -434,6 +468,9 @@ mod tests {
         let session = Session::new();
         assert!(handle_slash_command("/unknown", &session, CompactionConfig::default()).is_none());
         assert!(handle_slash_command("/status", &session, CompactionConfig::default()).is_none());
+        assert!(
+            handle_slash_command("/thinking on", &session, CompactionConfig::default()).is_none()
+        );
         assert!(
             handle_slash_command("/model claude", &session, CompactionConfig::default()).is_none()
         );

+ 11 - 5
rust/crates/runtime/src/compact.rs

@@ -130,7 +130,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
         .filter_map(|block| match block {
             ContentBlock::ToolUse { name, .. } => Some(name.as_str()),
             ContentBlock::ToolResult { tool_name, .. } => Some(tool_name.as_str()),
-            ContentBlock::Text { .. } => None,
+            ContentBlock::Text { .. } | ContentBlock::Thinking { .. } => None,
         })
         .collect::<Vec<_>>();
     tool_names.sort_unstable();
@@ -200,6 +200,7 @@ fn summarize_messages(messages: &[ConversationMessage]) -> String {
 fn summarize_block(block: &ContentBlock) -> String {
     let raw = match block {
         ContentBlock::Text { text } => text.clone(),
+        ContentBlock::Thinking { text, .. } => format!("thinking: {text}"),
         ContentBlock::ToolUse { name, input, .. } => format!("tool_use {name}({input})"),
         ContentBlock::ToolResult {
             tool_name,
@@ -258,7 +259,7 @@ fn collect_key_files(messages: &[ConversationMessage]) -> Vec<String> {
         .iter()
         .flat_map(|message| message.blocks.iter())
         .map(|block| match block {
-            ContentBlock::Text { text } => text.as_str(),
+            ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.as_str(),
             ContentBlock::ToolUse { input, .. } => input.as_str(),
             ContentBlock::ToolResult { output, .. } => output.as_str(),
         })
@@ -280,10 +281,15 @@ fn infer_current_work(messages: &[ConversationMessage]) -> Option<String> {
 
 fn first_text_block(message: &ConversationMessage) -> Option<&str> {
     message.blocks.iter().find_map(|block| match block {
-        ContentBlock::Text { text } if !text.trim().is_empty() => Some(text.as_str()),
+        ContentBlock::Text { text } | ContentBlock::Thinking { text, .. }
+            if !text.trim().is_empty() =>
+        {
+            Some(text.as_str())
+        }
         ContentBlock::ToolUse { .. }
         | ContentBlock::ToolResult { .. }
-        | ContentBlock::Text { .. } => None,
+        | ContentBlock::Text { .. }
+        | ContentBlock::Thinking { .. } => None,
     })
 }
 
@@ -328,7 +334,7 @@ fn estimate_message_tokens(message: &ConversationMessage) -> usize {
         .blocks
         .iter()
         .map(|block| match block {
-            ContentBlock::Text { text } => text.len() / 4 + 1,
+            ContentBlock::Text { text } | ContentBlock::Thinking { text, .. } => text.len() / 4 + 1,
             ContentBlock::ToolUse { name, input, .. } => (name.len() + input.len()) / 4 + 1,
             ContentBlock::ToolResult {
                 tool_name, output, ..

+ 57 - 7
rust/crates/runtime/src/conversation.rs

@@ -17,6 +17,8 @@ pub struct ApiRequest {
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum AssistantEvent {
     TextDelta(String),
+    ThinkingDelta(String),
+    ThinkingSignature(String),
     ToolUse {
         id: String,
         name: String,
@@ -247,15 +249,26 @@ fn build_assistant_message(
     events: Vec<AssistantEvent>,
 ) -> Result<(ConversationMessage, Option<TokenUsage>), RuntimeError> {
     let mut text = String::new();
+    let mut thinking = String::new();
+    let mut thinking_signature: Option<String> = None;
     let mut blocks = Vec::new();
     let mut finished = false;
     let mut usage = None;
 
     for event in events {
         match event {
-            AssistantEvent::TextDelta(delta) => text.push_str(&delta),
+            AssistantEvent::TextDelta(delta) => {
+                flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks);
+                text.push_str(&delta);
+            }
+            AssistantEvent::ThinkingDelta(delta) => {
+                flush_text_block(&mut text, &mut blocks);
+                thinking.push_str(&delta);
+            }
+            AssistantEvent::ThinkingSignature(signature) => thinking_signature = Some(signature),
             AssistantEvent::ToolUse { id, name, input } => {
                 flush_text_block(&mut text, &mut blocks);
+                flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks);
                 blocks.push(ContentBlock::ToolUse { id, name, input });
             }
             AssistantEvent::Usage(value) => usage = Some(value),
@@ -266,6 +279,7 @@ fn build_assistant_message(
     }
 
     flush_text_block(&mut text, &mut blocks);
+    flush_thinking_block(&mut thinking, &mut thinking_signature, &mut blocks);
 
     if !finished {
         return Err(RuntimeError::new(
@@ -290,6 +304,19 @@ fn flush_text_block(text: &mut String, blocks: &mut Vec<ContentBlock>) {
     }
 }
 
+fn flush_thinking_block(
+    thinking: &mut String,
+    signature: &mut Option<String>,
+    blocks: &mut Vec<ContentBlock>,
+) {
+    if !thinking.is_empty() || signature.is_some() {
+        blocks.push(ContentBlock::Thinking {
+            text: std::mem::take(thinking),
+            signature: signature.take(),
+        });
+    }
+}
+
 type ToolHandler = Box<dyn FnMut(&str) -> Result<String, ToolError>>;
 
 #[derive(Default)]
@@ -325,8 +352,8 @@ impl ToolExecutor for StaticToolExecutor {
 #[cfg(test)]
 mod tests {
     use super::{
-        ApiClient, ApiRequest, AssistantEvent, ConversationRuntime, RuntimeError,
-        StaticToolExecutor,
+        build_assistant_message, ApiClient, ApiRequest, AssistantEvent, ConversationRuntime,
+        RuntimeError, StaticToolExecutor,
     };
     use crate::compact::CompactionConfig;
     use crate::permissions::{
@@ -408,7 +435,7 @@ mod tests {
                 .sum::<i32>();
             Ok(total.to_string())
         });
-        let permission_policy = PermissionPolicy::new(PermissionMode::Prompt);
+        let permission_policy = PermissionPolicy::new(PermissionMode::WorkspaceWrite);
         let system_prompt = SystemPromptBuilder::new()
             .with_project_context(ProjectContext {
                 cwd: PathBuf::from("/tmp/project"),
@@ -487,7 +514,7 @@ mod tests {
             Session::new(),
             SingleCallApiClient,
             StaticToolExecutor::new(),
-            PermissionPolicy::new(PermissionMode::Prompt),
+            PermissionPolicy::new(PermissionMode::WorkspaceWrite),
             vec!["system".to_string()],
         );
 
@@ -502,6 +529,29 @@ mod tests {
         ));
     }
 
+    #[test]
+    fn thinking_blocks_are_preserved_separately_from_text() {
+        let (message, usage) = build_assistant_message(vec![
+            AssistantEvent::ThinkingDelta("first ".to_string()),
+            AssistantEvent::ThinkingDelta("second".to_string()),
+            AssistantEvent::ThinkingSignature("sig-1".to_string()),
+            AssistantEvent::TextDelta("final".to_string()),
+            AssistantEvent::MessageStop,
+        ])
+        .expect("assistant message should build");
+
+        assert_eq!(usage, None);
+        assert!(matches!(
+            &message.blocks[0],
+            ContentBlock::Thinking { text, signature }
+                if text == "first second" && signature.as_deref() == Some("sig-1")
+        ));
+        assert!(matches!(
+            &message.blocks[1],
+            ContentBlock::Text { text } if text == "final"
+        ));
+    }
+
     #[test]
     fn reconstructs_usage_tracker_from_restored_session() {
         struct SimpleApi;
@@ -536,7 +586,7 @@ mod tests {
             session,
             SimpleApi,
             StaticToolExecutor::new(),
-            PermissionPolicy::new(PermissionMode::Allow),
+            PermissionPolicy::new(PermissionMode::DangerFullAccess),
             vec!["system".to_string()],
         );
 
@@ -563,7 +613,7 @@ mod tests {
             Session::new(),
             SimpleApi,
             StaticToolExecutor::new(),
-            PermissionPolicy::new(PermissionMode::Allow),
+            PermissionPolicy::new(PermissionMode::DangerFullAccess),
             vec!["system".to_string()],
         );
         runtime.run_turn("a", None).expect("turn a");

+ 24 - 0
rust/crates/runtime/src/session.rs

@@ -19,6 +19,10 @@ pub enum ContentBlock {
     Text {
         text: String,
     },
+    Thinking {
+        text: String,
+        signature: Option<String>,
+    },
     ToolUse {
         id: String,
         name: String,
@@ -257,6 +261,19 @@ impl ContentBlock {
                 object.insert("type".to_string(), JsonValue::String("text".to_string()));
                 object.insert("text".to_string(), JsonValue::String(text.clone()));
             }
+            Self::Thinking { text, signature } => {
+                object.insert(
+                    "type".to_string(),
+                    JsonValue::String("thinking".to_string()),
+                );
+                object.insert("text".to_string(), JsonValue::String(text.clone()));
+                if let Some(signature) = signature {
+                    object.insert(
+                        "signature".to_string(),
+                        JsonValue::String(signature.clone()),
+                    );
+                }
+            }
             Self::ToolUse { id, name, input } => {
                 object.insert(
                     "type".to_string(),
@@ -303,6 +320,13 @@ impl ContentBlock {
             "text" => Ok(Self::Text {
                 text: required_string(object, "text")?,
             }),
+            "thinking" => Ok(Self::Thinking {
+                text: required_string(object, "text")?,
+                signature: object
+                    .get("signature")
+                    .and_then(JsonValue::as_str)
+                    .map(ToOwned::to_owned),
+            }),
             "tool_use" => Ok(Self::ToolUse {
                 id: required_string(object, "id")?,
                 name: required_string(object, "name")?,

+ 193 - 17
rust/crates/rusty-claude-cli/src/main.rs

@@ -13,7 +13,8 @@ use std::time::{SystemTime, UNIX_EPOCH};
 use api::{
     resolve_startup_auth_source, AnthropicClient, AuthSource, ContentBlockDelta, InputContentBlock,
     InputMessage, MessageRequest, MessageResponse, OutputContentBlock,
-    StreamEvent as ApiStreamEvent, ToolChoice, ToolDefinition, ToolResultContentBlock,
+    StreamEvent as ApiStreamEvent, ThinkingConfig, ToolChoice, ToolDefinition,
+    ToolResultContentBlock,
 };
 
 use commands::{
@@ -34,6 +35,7 @@ use tools::{execute_tool, mvp_tool_specs, ToolSpec};
 
 const DEFAULT_MODEL: &str = "claude-sonnet-4-20250514";
 const DEFAULT_MAX_TOKENS: u32 = 32;
+const DEFAULT_THINKING_BUDGET_TOKENS: u32 = 2_048;
 const DEFAULT_DATE: &str = "2026-03-31";
 const DEFAULT_OAUTH_CALLBACK_PORT: u16 = 4545;
 const VERSION: &str = env!("CARGO_PKG_VERSION");
@@ -70,7 +72,8 @@ fn run() -> Result<(), Box<dyn std::error::Error>> {
             output_format,
             allowed_tools,
             permission_mode,
-        } => LiveCli::new(model, false, allowed_tools, permission_mode)?
+            thinking,
+        } => LiveCli::new(model, false, allowed_tools, permission_mode, thinking)?
             .run_turn_with_output(&prompt, output_format)?,
         CliAction::Login => run_login()?,
         CliAction::Logout => run_logout()?,
@@ -78,7 +81,8 @@ fn run() -> Result<(), Box<dyn std::error::Error>> {
             model,
             allowed_tools,
             permission_mode,
-        } => run_repl(model, allowed_tools, permission_mode)?,
+            thinking,
+        } => run_repl(model, allowed_tools, permission_mode, thinking)?,
         CliAction::Help => print_help(),
     }
     Ok(())
@@ -103,6 +107,7 @@ enum CliAction {
         output_format: CliOutputFormat,
         allowed_tools: Option<AllowedToolSet>,
         permission_mode: PermissionMode,
+        thinking: bool,
     },
     Login,
     Logout,
@@ -110,6 +115,7 @@ enum CliAction {
         model: String,
         allowed_tools: Option<AllowedToolSet>,
         permission_mode: PermissionMode,
+        thinking: bool,
     },
     // prompt-mode formatting is only supported for non-interactive runs
     Help,
@@ -139,6 +145,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
     let mut output_format = CliOutputFormat::Text;
     let mut permission_mode = default_permission_mode();
     let mut wants_version = false;
+    let mut thinking = false;
     let mut allowed_tool_values = Vec::new();
     let mut rest = Vec::new();
     let mut index = 0;
@@ -149,6 +156,10 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
                 wants_version = true;
                 index += 1;
             }
+            "--thinking" => {
+                thinking = true;
+                index += 1;
+            }
             "--model" => {
                 let value = args
                     .get(index + 1)
@@ -215,6 +226,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
             model,
             allowed_tools,
             permission_mode,
+            thinking,
         });
     }
     if matches!(rest.first().map(String::as_str), Some("--help" | "-h")) {
@@ -241,6 +253,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
                 output_format,
                 allowed_tools,
                 permission_mode,
+                thinking,
             })
         }
         other if !other.starts_with('/') => Ok(CliAction::Prompt {
@@ -249,6 +262,7 @@ fn parse_args(args: &[String]) -> Result<CliAction, String> {
             output_format,
             allowed_tools,
             permission_mode,
+            thinking,
         }),
         other => Err(format!("unknown subcommand: {other}")),
     }
@@ -600,6 +614,7 @@ struct StatusUsage {
     latest: TokenUsage,
     cumulative: TokenUsage,
     estimated_tokens: usize,
+    thinking_enabled: bool,
 }
 
 fn format_model_report(model: &str, message_count: usize, turns: u32) -> String {
@@ -667,6 +682,39 @@ Usage
     )
 }
 
+fn format_thinking_report(enabled: bool) -> String {
+    let state = if enabled { "on" } else { "off" };
+    let budget = if enabled {
+        DEFAULT_THINKING_BUDGET_TOKENS.to_string()
+    } else {
+        "disabled".to_string()
+    };
+    format!(
+        "Thinking
+  Active mode      {state}
+  Budget tokens    {budget}
+
+Usage
+  Inspect current mode with /thinking
+  Toggle with /thinking on or /thinking off"
+    )
+}
+
+fn format_thinking_switch_report(enabled: bool) -> String {
+    let state = if enabled { "enabled" } else { "disabled" };
+    format!(
+        "Thinking updated
+  Result           {state}
+  Budget tokens    {}
+  Applies to       subsequent requests",
+        if enabled {
+            DEFAULT_THINKING_BUDGET_TOKENS.to_string()
+        } else {
+            "disabled".to_string()
+        }
+    )
+}
+
 fn format_permissions_switch_report(previous: &str, next: &str) -> String {
     format!(
         "Permissions updated
@@ -834,6 +882,7 @@ fn run_resume_command(
                         latest: tracker.current_turn_usage(),
                         cumulative: usage,
                         estimated_tokens: 0,
+                        thinking_enabled: false,
                     },
                     default_permission_mode().as_str(),
                     &status_context(Some(session_path))?,
@@ -880,6 +929,7 @@ fn run_resume_command(
             })
         }
         SlashCommand::Resume { .. }
+        | SlashCommand::Thinking { .. }
         | SlashCommand::Model { .. }
         | SlashCommand::Permissions { .. }
         | SlashCommand::Session { .. }
@@ -891,8 +941,15 @@ fn run_repl(
     model: String,
     allowed_tools: Option<AllowedToolSet>,
     permission_mode: PermissionMode,
+    thinking_enabled: bool,
 ) -> Result<(), Box<dyn std::error::Error>> {
-    let mut cli = LiveCli::new(model, true, allowed_tools, permission_mode)?;
+    let mut cli = LiveCli::new(
+        model,
+        true,
+        allowed_tools,
+        permission_mode,
+        thinking_enabled,
+    )?;
     let mut editor = input::LineEditor::new("› ", slash_command_completion_candidates());
     println!("{}", cli.startup_banner());
 
@@ -945,6 +1002,7 @@ struct LiveCli {
     model: String,
     allowed_tools: Option<AllowedToolSet>,
     permission_mode: PermissionMode,
+    thinking_enabled: bool,
     system_prompt: Vec<String>,
     runtime: ConversationRuntime<AnthropicRuntimeClient, CliToolExecutor>,
     session: SessionHandle,
@@ -956,6 +1014,7 @@ impl LiveCli {
         enable_tools: bool,
         allowed_tools: Option<AllowedToolSet>,
         permission_mode: PermissionMode,
+        thinking_enabled: bool,
     ) -> Result<Self, Box<dyn std::error::Error>> {
         let system_prompt = build_system_prompt()?;
         let session = create_managed_session_handle()?;
@@ -966,11 +1025,13 @@ impl LiveCli {
             enable_tools,
             allowed_tools.clone(),
             permission_mode,
+            thinking_enabled,
         )?;
         let cli = Self {
             model,
             allowed_tools,
             permission_mode,
+            thinking_enabled,
             system_prompt,
             runtime,
             session,
@@ -981,9 +1042,10 @@ impl LiveCli {
 
     fn startup_banner(&self) -> String {
         format!(
-            "Rusty Claude CLI\n  Model            {}\n  Permission mode  {}\n  Working directory {}\n  Session          {}\n\nType /help for commands. Shift+Enter or Ctrl+J inserts a newline.",
+            "Rusty Claude CLI\n  Model            {}\n  Permission mode  {}\n  Thinking         {}\n  Working directory {}\n  Session          {}\n\nType /help for commands. Shift+Enter or Ctrl+J inserts a newline.",
             self.model,
             self.permission_mode.as_str(),
+            if self.thinking_enabled { "on" } else { "off" },
             env::current_dir().map_or_else(
                 |_| "<unknown>".to_string(),
                 |path| path.display().to_string(),
@@ -1049,6 +1111,9 @@ impl LiveCli {
             system: (!self.system_prompt.is_empty()).then(|| self.system_prompt.join("\n\n")),
             tools: None,
             tool_choice: None,
+            thinking: self
+                .thinking_enabled
+                .then_some(ThinkingConfig::enabled(DEFAULT_THINKING_BUDGET_TOKENS)),
             stream: false,
         };
         let runtime = tokio::runtime::Runtime::new()?;
@@ -1058,7 +1123,7 @@ impl LiveCli {
             .iter()
             .filter_map(|block| match block {
                 OutputContentBlock::Text { text } => Some(text.as_str()),
-                OutputContentBlock::ToolUse { .. } => None,
+                OutputContentBlock::Thinking { .. } | OutputContentBlock::ToolUse { .. } => None,
             })
             .collect::<Vec<_>>()
             .join("");
@@ -1095,6 +1160,7 @@ impl LiveCli {
                 self.compact()?;
                 false
             }
+            SlashCommand::Thinking { enabled } => self.set_thinking(enabled)?,
             SlashCommand::Model { model } => self.set_model(model)?,
             SlashCommand::Permissions { mode } => self.set_permissions(mode)?,
             SlashCommand::Clear { confirm } => self.clear_session(confirm)?,
@@ -1155,6 +1221,7 @@ impl LiveCli {
                     latest,
                     cumulative,
                     estimated_tokens: self.runtime.estimated_tokens(),
+                    thinking_enabled: self.thinking_enabled,
                 },
                 self.permission_mode.as_str(),
                 &status_context(Some(&self.session.path)).expect("status context should load"),
@@ -1197,6 +1264,7 @@ impl LiveCli {
             true,
             self.allowed_tools.clone(),
             self.permission_mode,
+            self.thinking_enabled,
         )?;
         self.model.clone_from(&model);
         println!(
@@ -1206,6 +1274,32 @@ impl LiveCli {
         Ok(true)
     }
 
+    fn set_thinking(&mut self, enabled: Option<bool>) -> Result<bool, Box<dyn std::error::Error>> {
+        let Some(enabled) = enabled else {
+            println!("{}", format_thinking_report(self.thinking_enabled));
+            return Ok(false);
+        };
+
+        if enabled == self.thinking_enabled {
+            println!("{}", format_thinking_report(self.thinking_enabled));
+            return Ok(false);
+        }
+
+        let session = self.runtime.session().clone();
+        self.thinking_enabled = enabled;
+        self.runtime = build_runtime(
+            session,
+            self.model.clone(),
+            self.system_prompt.clone(),
+            true,
+            self.allowed_tools.clone(),
+            self.permission_mode,
+            self.thinking_enabled,
+        )?;
+        println!("{}", format_thinking_switch_report(self.thinking_enabled));
+        Ok(true)
+    }
+
     fn set_permissions(
         &mut self,
         mode: Option<String>,
@@ -1239,6 +1333,7 @@ impl LiveCli {
             true,
             self.allowed_tools.clone(),
             self.permission_mode,
+            self.thinking_enabled,
         )?;
         println!(
             "{}",
@@ -1263,6 +1358,7 @@ impl LiveCli {
             true,
             self.allowed_tools.clone(),
             self.permission_mode,
+            self.thinking_enabled,
         )?;
         println!(
             "Session cleared\n  Mode             fresh session\n  Preserved model  {}\n  Permission mode  {}\n  Session          {}",
@@ -1297,6 +1393,7 @@ impl LiveCli {
             true,
             self.allowed_tools.clone(),
             self.permission_mode,
+            self.thinking_enabled,
         )?;
         self.session = handle;
         println!(
@@ -1373,6 +1470,7 @@ impl LiveCli {
                     true,
                     self.allowed_tools.clone(),
                     self.permission_mode,
+                    self.thinking_enabled,
                 )?;
                 self.session = handle;
                 println!(
@@ -1402,6 +1500,7 @@ impl LiveCli {
             true,
             self.allowed_tools.clone(),
             self.permission_mode,
+            self.thinking_enabled,
         )?;
         self.persist_session()?;
         println!("{}", format_compact_report(removed, kept, skipped));
@@ -1513,6 +1612,7 @@ fn render_repl_help() -> String {
     [
         "REPL".to_string(),
         "  /exit                Quit the REPL".to_string(),
+        "  /thinking [on|off]   Show or toggle extended thinking".to_string(),
         "  /quit                Quit the REPL".to_string(),
         "  Up/Down              Navigate prompt history".to_string(),
         "  Tab                  Complete slash commands".to_string(),
@@ -1559,10 +1659,14 @@ fn format_status_report(
             "Status
   Model            {model}
   Permission mode  {permission_mode}
+  Thinking         {}
   Messages         {}
   Turns            {}
   Estimated tokens {}",
-            usage.message_count, usage.turns, usage.estimated_tokens,
+            if usage.thinking_enabled { "on" } else { "off" },
+            usage.message_count,
+            usage.turns,
+            usage.estimated_tokens,
         ),
         format!(
             "Usage
@@ -1834,6 +1938,15 @@ fn render_export_text(session: &Session) -> String {
         for block in &message.blocks {
             match block {
                 ContentBlock::Text { text } => lines.push(text.clone()),
+                ContentBlock::Thinking { text, signature } => {
+                    lines.push(format!(
+                        "[thinking{}] {}",
+                        signature
+                            .as_ref()
+                            .map_or(String::new(), |value| format!(" signature={value}")),
+                        text
+                    ));
+                }
                 ContentBlock::ToolUse { id, name, input } => {
                     lines.push(format!("[tool_use id={id} name={name}] {input}"));
                 }
@@ -1924,11 +2037,12 @@ fn build_runtime(
     enable_tools: bool,
     allowed_tools: Option<AllowedToolSet>,
     permission_mode: PermissionMode,
+    thinking_enabled: bool,
 ) -> Result<ConversationRuntime<AnthropicRuntimeClient, CliToolExecutor>, Box<dyn std::error::Error>>
 {
     Ok(ConversationRuntime::new(
         session,
-        AnthropicRuntimeClient::new(model, enable_tools, allowed_tools.clone())?,
+        AnthropicRuntimeClient::new(model, enable_tools, allowed_tools.clone(), thinking_enabled)?,
         CliToolExecutor::new(allowed_tools),
         permission_policy(permission_mode),
         system_prompt,
@@ -1987,6 +2101,7 @@ struct AnthropicRuntimeClient {
     model: String,
     enable_tools: bool,
     allowed_tools: Option<AllowedToolSet>,
+    thinking_enabled: bool,
 }
 
 impl AnthropicRuntimeClient {
@@ -1994,6 +2109,7 @@ impl AnthropicRuntimeClient {
         model: String,
         enable_tools: bool,
         allowed_tools: Option<AllowedToolSet>,
+        thinking_enabled: bool,
     ) -> Result<Self, Box<dyn std::error::Error>> {
         Ok(Self {
             runtime: tokio::runtime::Runtime::new()?,
@@ -2001,6 +2117,7 @@ impl AnthropicRuntimeClient {
             model,
             enable_tools,
             allowed_tools,
+            thinking_enabled,
         })
     }
 }
@@ -2034,6 +2151,9 @@ impl ApiClient for AnthropicRuntimeClient {
                     .collect()
             }),
             tool_choice: self.enable_tools.then_some(ToolChoice::Auto),
+            thinking: self
+                .thinking_enabled
+                .then_some(ThinkingConfig::enabled(DEFAULT_THINKING_BUDGET_TOKENS)),
             stream: true,
         };
 
@@ -2046,6 +2166,7 @@ impl ApiClient for AnthropicRuntimeClient {
             let mut stdout = io::stdout();
             let mut events = Vec::new();
             let mut pending_tool: Option<(String, String, String)> = None;
+            let mut pending_thinking_signature: Option<String> = None;
             let mut saw_stop = false;
 
             while let Some(event) = stream
@@ -2056,7 +2177,13 @@ impl ApiClient for AnthropicRuntimeClient {
                 match event {
                     ApiStreamEvent::MessageStart(start) => {
                         for block in start.message.content {
-                            push_output_block(block, &mut stdout, &mut events, &mut pending_tool)?;
+                            push_output_block(
+                                block,
+                                &mut stdout,
+                                &mut events,
+                                &mut pending_tool,
+                                &mut pending_thinking_signature,
+                            )?;
                         }
                     }
                     ApiStreamEvent::ContentBlockStart(start) => {
@@ -2065,6 +2192,7 @@ impl ApiClient for AnthropicRuntimeClient {
                             &mut stdout,
                             &mut events,
                             &mut pending_tool,
+                            &mut pending_thinking_signature,
                         )?;
                     }
                     ApiStreamEvent::ContentBlockDelta(delta) => match delta.delta {
@@ -2076,6 +2204,14 @@ impl ApiClient for AnthropicRuntimeClient {
                                 events.push(AssistantEvent::TextDelta(text));
                             }
                         }
+                        ContentBlockDelta::ThinkingDelta { thinking } => {
+                            if !thinking.is_empty() {
+                                events.push(AssistantEvent::ThinkingDelta(thinking));
+                            }
+                        }
+                        ContentBlockDelta::SignatureDelta { signature } => {
+                            events.push(AssistantEvent::ThinkingSignature(signature));
+                        }
                         ContentBlockDelta::InputJsonDelta { partial_json } => {
                             if let Some((_, _, input)) = &mut pending_tool {
                                 input.push_str(&partial_json);
@@ -2105,6 +2241,8 @@ impl ApiClient for AnthropicRuntimeClient {
             if !saw_stop
                 && events.iter().any(|event| {
                     matches!(event, AssistantEvent::TextDelta(text) if !text.is_empty())
+                        || matches!(event, AssistantEvent::ThinkingDelta(text) if !text.is_empty())
+                        || matches!(event, AssistantEvent::ThinkingSignature(_))
                         || matches!(event, AssistantEvent::ToolUse { .. })
                 })
             {
@@ -2188,11 +2326,19 @@ fn truncate_for_summary(value: &str, limit: usize) -> String {
     }
 }
 
+fn render_thinking_block_summary(text: &str, out: &mut impl Write) -> Result<(), RuntimeError> {
+    let summary = format!("▶ Thinking ({} chars hidden)", text.chars().count());
+    writeln!(out, "\n{summary}")
+        .and_then(|()| out.flush())
+        .map_err(|error| RuntimeError::new(error.to_string()))
+}
+
 fn push_output_block(
     block: OutputContentBlock,
     out: &mut impl Write,
     events: &mut Vec<AssistantEvent>,
     pending_tool: &mut Option<(String, String, String)>,
+    pending_thinking_signature: &mut Option<String>,
 ) -> Result<(), RuntimeError> {
     match block {
         OutputContentBlock::Text { text } => {
@@ -2203,6 +2349,19 @@ fn push_output_block(
                 events.push(AssistantEvent::TextDelta(text));
             }
         }
+        OutputContentBlock::Thinking {
+            thinking,
+            signature,
+        } => {
+            render_thinking_block_summary(&thinking, out)?;
+            if !thinking.is_empty() {
+                events.push(AssistantEvent::ThinkingDelta(thinking));
+            }
+            if let Some(signature) = signature {
+                *pending_thinking_signature = Some(signature.clone());
+                events.push(AssistantEvent::ThinkingSignature(signature));
+            }
+        }
         OutputContentBlock::ToolUse { id, name, input } => {
             writeln!(
                 out,
@@ -2224,9 +2383,16 @@ fn response_to_events(
 ) -> Result<Vec<AssistantEvent>, RuntimeError> {
     let mut events = Vec::new();
     let mut pending_tool = None;
+    let mut pending_thinking_signature = None;
 
     for block in response.content {
-        push_output_block(block, out, &mut events, &mut pending_tool)?;
+        push_output_block(
+            block,
+            out,
+            &mut events,
+            &mut pending_tool,
+            &mut pending_thinking_signature,
+        )?;
         if let Some((id, name, input)) = pending_tool.take() {
             events.push(AssistantEvent::ToolUse { id, name, input });
         }
@@ -2311,26 +2477,29 @@ fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
             let content = message
                 .blocks
                 .iter()
-                .map(|block| match block {
-                    ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() },
-                    ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse {
+                .filter_map(|block| match block {
+                    ContentBlock::Text { text } => {
+                        Some(InputContentBlock::Text { text: text.clone() })
+                    }
+                    ContentBlock::Thinking { .. } => None,
+                    ContentBlock::ToolUse { id, name, input } => Some(InputContentBlock::ToolUse {
                         id: id.clone(),
                         name: name.clone(),
                         input: serde_json::from_str(input)
                             .unwrap_or_else(|_| serde_json::json!({ "raw": input })),
-                    },
+                    }),
                     ContentBlock::ToolResult {
                         tool_use_id,
                         output,
                         is_error,
                         ..
-                    } => InputContentBlock::ToolResult {
+                    } => Some(InputContentBlock::ToolResult {
                         tool_use_id: tool_use_id.clone(),
                         content: vec![ToolResultContentBlock::Text {
                             text: output.clone(),
                         }],
                         is_error: *is_error,
-                    },
+                    }),
                 })
                 .collect::<Vec<_>>();
             (!content.is_empty()).then(|| InputMessage {
@@ -2363,6 +2532,7 @@ fn print_help() {
     println!("  --model MODEL              Override the active model");
     println!("  --output-format FORMAT     Non-interactive output format: text or json");
     println!("  --permission-mode MODE     Set read-only, workspace-write, or danger-full-access");
+    println!("  --thinking                 Enable extended thinking with the default budget");
     println!("  --allowedTools TOOLS       Restrict enabled tools (repeatable; comma-separated aliases supported)");
     println!("  --version, -V              Print version and build information locally");
     println!();
@@ -2408,6 +2578,7 @@ mod tests {
                 model: DEFAULT_MODEL.to_string(),
                 allowed_tools: None,
                 permission_mode: PermissionMode::WorkspaceWrite,
+                thinking: false,
             }
         );
     }
@@ -2427,6 +2598,7 @@ mod tests {
                 output_format: CliOutputFormat::Text,
                 allowed_tools: None,
                 permission_mode: PermissionMode::WorkspaceWrite,
+                thinking: false,
             }
         );
     }
@@ -2448,6 +2620,7 @@ mod tests {
                 output_format: CliOutputFormat::Json,
                 allowed_tools: None,
                 permission_mode: PermissionMode::WorkspaceWrite,
+                thinking: false,
             }
         );
     }
@@ -2473,6 +2646,7 @@ mod tests {
                 model: DEFAULT_MODEL.to_string(),
                 allowed_tools: None,
                 permission_mode: PermissionMode::ReadOnly,
+                thinking: false,
             }
         );
     }
@@ -2495,6 +2669,7 @@ mod tests {
                         .collect()
                 ),
                 permission_mode: PermissionMode::WorkspaceWrite,
+                thinking: false,
             }
         );
     }
@@ -2734,6 +2909,7 @@ mod tests {
                     cache_read_input_tokens: 1,
                 },
                 estimated_tokens: 128,
+                thinking_enabled: true,
             },
             "workspace-write",
             &super::StatusContext {
@@ -2797,7 +2973,7 @@ mod tests {
     fn status_context_reads_real_workspace_metadata() {
         let context = status_context(None).expect("status context should load");
         assert!(context.cwd.is_absolute());
-        assert_eq!(context.discovered_config_files, 3);
+        assert!(context.discovered_config_files >= context.loaded_config_files);
         assert!(context.loaded_config_files <= context.discovered_config_files);
     }