소스 검색

Initial commit: Claude Code v2.1.88 source extracted from npm source maps

sachin1801 3 일 전
커밋
9687ada023
100개의 변경된 파일34500개의 추가작업 그리고 0개의 파일을 삭제
  1. 10 0
      .gitignore
  2. 427 0
      CLAUDE.md
  3. 21 0
      LICENSE
  4. 92 0
      README.md
  5. 38 0
      build.ts
  6. 141 0
      bun.lock
  7. 118 0
      package.json
  8. 35 0
      scripts/generate-sdk-types.ts
  9. 3 0
      shims/bun-bundle.d.ts
  10. 5 0
      shims/bun-bundle.ts
  11. 7 0
      shims/globals.d.ts
  12. 1295 0
      src/QueryEngine.ts
  13. 125 0
      src/Task.ts
  14. 792 0
      src/Tool.ts
  15. 5 0
      src/assistant/AssistantSessionChooser.tsx
  16. 87 0
      src/assistant/sessionHistory.ts
  17. 1758 0
      src/bootstrap/state.ts
  18. 539 0
      src/bridge/bridgeApi.ts
  19. 48 0
      src/bridge/bridgeConfig.ts
  20. 135 0
      src/bridge/bridgeDebug.ts
  21. 202 0
      src/bridge/bridgeEnabled.ts
  22. 2999 0
      src/bridge/bridgeMain.ts
  23. 461 0
      src/bridge/bridgeMessaging.ts
  24. 43 0
      src/bridge/bridgePermissionCallbacks.ts
  25. 210 0
      src/bridge/bridgePointer.ts
  26. 163 0
      src/bridge/bridgeStatusUtil.ts
  27. 530 0
      src/bridge/bridgeUI.ts
  28. 56 0
      src/bridge/capacityWake.ts
  29. 168 0
      src/bridge/codeSessionApi.ts
  30. 384 0
      src/bridge/createSession.ts
  31. 141 0
      src/bridge/debugUtils.ts
  32. 165 0
      src/bridge/envLessBridgeConfig.ts
  33. 71 0
      src/bridge/flushGate.ts
  34. 175 0
      src/bridge/inboundAttachments.ts
  35. 80 0
      src/bridge/inboundMessages.ts
  36. 569 0
      src/bridge/initReplBridge.ts
  37. 256 0
      src/bridge/jwtUtils.ts
  38. 110 0
      src/bridge/pollConfig.ts
  39. 82 0
      src/bridge/pollConfigDefaults.ts
  40. 1008 0
      src/bridge/remoteBridgeCore.ts
  41. 2406 0
      src/bridge/replBridge.ts
  42. 36 0
      src/bridge/replBridgeHandle.ts
  43. 370 0
      src/bridge/replBridgeTransport.ts
  44. 57 0
      src/bridge/sessionIdCompat.ts
  45. 550 0
      src/bridge/sessionRunner.ts
  46. 210 0
      src/bridge/trustedDevice.ts
  47. 262 0
      src/bridge/types.ts
  48. 127 0
      src/bridge/workSecret.ts
  49. 370 0
      src/buddy/CompanionSprite.tsx
  50. 133 0
      src/buddy/companion.ts
  51. 36 0
      src/buddy/prompt.ts
  52. 514 0
      src/buddy/sprites.ts
  53. 148 0
      src/buddy/types.ts
  54. 97 0
      src/buddy/useBuddyNotification.tsx
  55. 31 0
      src/cli/exit.ts
  56. 70 0
      src/cli/handlers/agents.ts
  57. 330 0
      src/cli/handlers/auth.ts
  58. 170 0
      src/cli/handlers/autoMode.ts
  59. 361 0
      src/cli/handlers/mcp.tsx
  60. 878 0
      src/cli/handlers/plugins.ts
  61. 109 0
      src/cli/handlers/util.tsx
  62. 32 0
      src/cli/ndjsonSafeStringify.ts
  63. 5594 0
      src/cli/print.ts
  64. 255 0
      src/cli/remoteIO.ts
  65. 859 0
      src/cli/structuredIO.ts
  66. 282 0
      src/cli/transports/HybridTransport.ts
  67. 711 0
      src/cli/transports/SSETransport.ts
  68. 275 0
      src/cli/transports/SerialBatchEventUploader.ts
  69. 800 0
      src/cli/transports/WebSocketTransport.ts
  70. 131 0
      src/cli/transports/WorkerStateUploader.ts
  71. 998 0
      src/cli/transports/ccrClient.ts
  72. 45 0
      src/cli/transports/transportUtils.ts
  73. 422 0
      src/cli/update.ts
  74. 754 0
      src/commands.ts
  75. 125 0
      src/commands/add-dir/add-dir.tsx
  76. 11 0
      src/commands/add-dir/index.ts
  77. 110 0
      src/commands/add-dir/validation.ts
  78. 109 0
      src/commands/advisor.ts
  79. 5 0
      src/commands/agents-platform/index.ts
  80. 12 0
      src/commands/agents/agents.tsx
  81. 10 0
      src/commands/agents/index.ts
  82. 1 0
      src/commands/ant-trace/index.js
  83. 8 0
      src/commands/assistant/assistant.tsx
  84. 1 0
      src/commands/autofix-pr/index.js
  85. 1 0
      src/commands/backfill-sessions/index.js
  86. 296 0
      src/commands/branch/branch.ts
  87. 14 0
      src/commands/branch/index.ts
  88. 1 0
      src/commands/break-cache/index.js
  89. 200 0
      src/commands/bridge-kick.ts
  90. 508 0
      src/commands/bridge/bridge.tsx
  91. 26 0
      src/commands/bridge/index.ts
  92. 130 0
      src/commands/brief.ts
  93. 242 0
      src/commands/btw/btw.tsx
  94. 13 0
      src/commands/btw/index.ts
  95. 1 0
      src/commands/bughunter/index.js
  96. 284 0
      src/commands/chrome/chrome.tsx
  97. 13 0
      src/commands/chrome/index.ts
  98. 144 0
      src/commands/clear/caches.ts
  99. 7 0
      src/commands/clear/clear.ts
  100. 251 0
      src/commands/clear/conversation.ts

+ 10 - 0
.gitignore

@@ -0,0 +1,10 @@
+node_modules/
+dist/
+*.js.map
+.DS_Store
+bun.lockb
+
+# Large extracted files
+cli.js.map
+sourcemap-extract.tar.gz
+vendor/

+ 427 - 0
CLAUDE.md

@@ -0,0 +1,427 @@
+# Claude Code — Project Map
+
+> **This file must be kept up to date.** Whenever you add files, create stubs, extract new sources, or change the build — update the relevant section here. This is the single source of truth for what's in this repo and how it works.
+
+## How to Build & Run
+
+```bash
+bun install          # install dependencies
+bun run build        # bundles to dist/cli.js (~23MB)
+bun dist/cli.js      # run it
+```
+
+## Using with Agent SDK (in Tauri or other apps)
+
+```typescript
+import { query } from "@anthropic-ai/claude-agent-sdk";
+const response = query({
+  prompt: "your prompt",
+  options: {
+    pathToClaudeCodeExecutable: "/path/to/claude-code/dist/cli.js",
+  },
+});
+```
+
+## Project Structure
+
+```
+claude-code/
+├── dist/                          # Build output (gitignored)
+│   └── cli.js                     # Bundled CLI (23MB, single file)
+│
+├── src/                           # Main source (1,929 files) — leaked from Anthropic
+│   ├── main.tsx                   # CLI entrypoint — Commander.js parser, all flags
+│   ├── entrypoints/
+│   │   ├── cli.tsx                # Bootstrap — version check, fast-paths
+│   │   ├── init.ts                # Initialization — telemetry, config, auth
+│   │   ├── mcp.ts                 # MCP server entrypoint
+│   │   └── sdk/                   # Agent SDK types
+│   │       ├── coreSchemas.ts     # Zod schemas (source of truth for types)
+│   │       ├── coreTypes.ts       # Re-exports generated types
+│   │       ├── coreTypes.generated.ts  # [GENERATED] from coreSchemas.ts
+│   │       ├── runtimeTypes.ts    # [STUB] SDK runtime types
+│   │       ├── toolTypes.ts       # [STUB] SDK tool types
+│   │       └── settingsTypes.generated.ts  # [STUB] Settings types
+│   │
+│   ├── commands/                  # Slash commands (~50)
+│   │   ├── agents-platform/       # [STUB] Ant-only
+│   │   └── assistant/             # [STUB] Assistant wizard
+│   │
+│   ├── tools/                     # Agent tools (~40)
+│   │   ├── BashTool/              # Shell execution
+│   │   ├── FileEditTool/          # File editing
+│   │   ├── FileReadTool/          # File reading
+│   │   ├── FileWriteTool/         # File writing
+│   │   ├── GlobTool/              # File search
+│   │   ├── GrepTool/              # Content search
+│   │   ├── AgentTool/             # Subagent spawning
+│   │   ├── WebFetchTool/          # HTTP fetching
+│   │   ├── TungstenTool/          # [STUB] Ant-only debug tool
+│   │   ├── REPLTool/              # [STUB] Ant-only REPL
+│   │   ├── SuggestBackgroundPRTool/ # [STUB] Ant-only
+│   │   ├── VerifyPlanExecutionTool/ # [STUB] Env-gated
+│   │   └── WorkflowTool/          # [STUB] Feature-gated (WORKFLOW_SCRIPTS)
+│   │
+│   ├── components/                # React (Ink) UI components (~140)
+│   │   ├── agents/
+│   │   │   └── SnapshotUpdateDialog.tsx  # [STUB]
+│   │   ├── design-system/         # Theme, colors, tokens
+│   │   ├── LogoV2/                # Welcome screen, release notes
+│   │   ├── Message.tsx            # Message rendering
+│   │   ├── StructuredDiff/        # Syntax-highlighted diffs
+│   │   └── permissions/           # Permission approval dialogs
+│   │
+│   ├── screens/
+│   │   └── REPL.tsx               # Main interactive screen (2800+ lines)
+│   │
+│   ├── ink/                       # Custom Ink fork (terminal React renderer)
+│   │   ├── layout/                # Flexbox layout engine
+│   │   ├── components/            # Box, Text, ScrollBox, Button, etc.
+│   │   ├── hooks/                 # useInput, useStdin, useSelection, etc.
+│   │   ├── events/                # Click, keyboard, focus events
+│   │   ├── termio/                # Terminal I/O, ANSI parsing
+│   │   └── reconciler.ts          # React reconciler
+│   │
+│   ├── services/
+│   │   ├── api/                   # Anthropic API client, streaming, errors
+│   │   ├── mcp/                   # MCP client/server implementation
+│   │   ├── oauth/                 # OAuth flow
+│   │   ├── analytics/             # Telemetry, GrowthBook, DataDog
+│   │   ├── lsp/                   # Language Server Protocol
+│   │   ├── compact/               # Context compaction
+│   │   │   ├── snipCompact.ts     # [STUB] Feature-gated (HISTORY_SNIP)
+│   │   │   └── cachedMicrocompact.ts  # [STUB] Feature-gated
+│   │   ├── contextCollapse/       # [STUB] Not in leak
+│   │   ├── plugins/               # Plugin installation & management
+│   │   └── tools/                 # Tool execution (StreamingToolExecutor)
+│   │
+│   ├── native-ts/                 # Pure TypeScript ports of native modules
+│   │   ├── yoga-layout/           # Flexbox engine (port of Meta's Yoga)
+│   │   ├── color-diff/            # Syntax-highlighted diffs (port of Rust module)
+│   │   └── file-index/            # Fuzzy file search (port of nucleo)
+│   │
+│   ├── constants/
+│   │   ├── prompts.ts             # FULL system prompt — the actual instructions sent to Claude
+│   │   ├── oauth.ts               # OAuth config (client IDs, endpoints)
+│   │   └── product.ts             # Product constants
+│   │
+│   ├── utils/
+│   │   ├── autoUpdater.ts         # Version check [PATCHED — remote check disabled]
+│   │   ├── computerUse/           # Computer use integration layer
+│   │   │   └── executor.ts        # 22KB CLI executor — wraps Swift/Rust native modules
+│   │   ├── claudeInChrome/        # Chrome integration layer
+│   │   ├── sandbox/               # Sandbox adapter
+│   │   ├── settings/              # Settings system
+│   │   ├── model/                 # Model selection, aliases
+│   │   ├── auth.ts                # Authentication
+│   │   ├── hooks/                 # Hook execution engine (155 files total)
+│   │   │   ├── AsyncHookRegistry.ts    # Hook registration & lifecycle
+│   │   │   ├── execAgentHook.ts        # Agent-spawning hooks
+│   │   │   ├── execHttpHook.ts         # HTTP webhook hooks
+│   │   │   ├── execPromptHook.ts       # Prompt-based hooks
+│   │   │   ├── hookEvents.ts           # All hook event types
+│   │   │   └── hooksConfigManager.ts   # settings.json hook config
+│   │   ├── plugins/               # Plugin system (65+ files)
+│   │   │   ├── pluginLoader.ts         # Loads plugins from directories
+│   │   │   ├── loadPluginAgents.ts     # Agent definitions from plugins
+│   │   │   ├── loadPluginCommands.ts   # Slash commands from plugins
+│   │   │   ├── loadPluginHooks.ts      # Hooks from plugins
+│   │   │   ├── schemas.ts             # plugin.json schema validation
+│   │   │   └── marketplaceManager.ts  # Marketplace browsing/install
+│   │   ├── permissions/           # Permission & auto-mode classifier
+│   │   │   ├── yoloClassifier.ts  # 52KB — auto-mode LLM classifier logic
+│   │   │   ├── bashClassifier.ts  # Bash-specific classifier
+│   │   │   ├── classifierDecision.ts  # Safe tool allowlist
+│   │   │   ├── autoModeState.ts   # Auto-mode state management
+│   │   │   └── yolo-classifier-prompts/  # [MISSING] DCE'd by feature flag
+│   │   ├── protectedNamespace.ts  # [STUB] Ant-only
+│   │   └── filePersistence/
+│   │       └── types.ts           # [STUB]
+│   │
+│   ├── skills/                    # Built-in skills (23 files)
+│   │   ├── bundledSkills.ts       # Skill registry
+│   │   ├── loadSkillsDir.ts       # Load skills from directories
+│   │   └── bundled/               # 16 bundled skills (batch, claudeApi, debug, loop, etc.)
+│   │
+│   ├── assistant/
+│   │   ├── sessionHistory.ts      # Session history
+│   │   └── AssistantSessionChooser.tsx  # [STUB]
+│   │
+│   ├── vim/                       # Vim mode (motions, operators, text objects)
+│   ├── state/                     # App state management
+│   ├── hooks/                     # React hooks
+│   ├── types/
+│   │   └── connectorText.ts       # [STUB]
+│   ├── bridge/                    # Cloud session bridging
+│   ├── coordinator/               # Multi-agent coordinator
+│   ├── plugins/                   # Plugin system entry
+│   ├── bootstrap/                 # Bootstrap/startup state
+│   └── voice/                     # Voice mode
+│
+├── stubs/                         # Extracted proprietary source code
+│   ├── @ant/                      # Private Anthropic packages (28 files)
+│   │   ├── computer-use-mcp/      # Computer Use MCP server
+│   │   │   └── src/
+│   │   │       ├── index.ts       # Exports
+│   │   │       ├── toolCalls.ts   # 137KB — full tool implementation
+│   │   │       ├── tools.ts       # Tool definitions
+│   │   │       ├── mcpServer.ts   # MCP server setup
+│   │   │       ├── types.ts       # All CU types
+│   │   │       ├── deniedApps.ts  # App blocklist
+│   │   │       ├── keyBlocklist.ts # Key combo blocklist
+│   │   │       ├── sentinelApps.ts # Sentinel app detection
+│   │   │       ├── imageResize.ts # Screenshot resizing
+│   │   │       ├── pixelCompare.ts # Click target validation
+│   │   │       ├── executor.ts    # [STUB] Native Swift/Rust bridge interface
+│   │   │       └── subGates.ts    # [STUB] Permission sub-gates
+│   │   │
+│   │   ├── claude-for-chrome-mcp/ # Chrome automation (8 source files)
+│   │   │   └── src/
+│   │   │       ├── index.ts       # Exports
+│   │   │       ├── bridgeClient.ts # 37KB — Chrome bridge via WebSocket
+│   │   │       ├── browserTools.ts # 25KB — browser tool definitions
+│   │   │       ├── mcpServer.ts   # MCP server
+│   │   │       ├── mcpSocketClient.ts # WebSocket client
+│   │   │       ├── mcpSocketPool.ts   # Connection pooling
+│   │   │       ├── toolCalls.ts   # Tool call handling
+│   │   │       └── types.ts       # Types
+│   │   │
+│   │   ├── computer-use-swift/    # macOS native bridge
+│   │   │   └── js/index.js        # JS loader for Swift binary
+│   │   │
+│   │   └── computer-use-input/    # Input device bridge
+│   │       └── js/index.js        # JS loader for Rust binary
+│   │
+│   ├── @anthropic-ai/            # Anthropic SDK sources (105+ files)
+│   │   ├── sandbox-runtime/       # Sandbox system (17 files, 180KB)
+│   │   │   ├── dist/
+│   │   │   │   ├── sandbox/
+│   │   │   │   │   ├── sandbox-manager.js    # 31KB — core orchestrator
+│   │   │   │   │   ├── sandbox-config.js     # Config/schema
+│   │   │   │   │   ├── sandbox-schemas.js    # Zod schemas
+│   │   │   │   │   ├── parent-proxy.js       # 17KB — parent process proxy
+│   │   │   │   │   ├── macos-sandbox-utils.js # 28KB — macOS Seatbelt profiles
+│   │   │   │   │   ├── linux-sandbox-utils.js # 42KB — Linux namespaces + seccomp
+│   │   │   │   │   ├── generate-seccomp-filter.js # 12KB — raw BPF bytecode gen
+│   │   │   │   │   ├── http-proxy.js         # HTTP egress proxy
+│   │   │   │   │   ├── socks-proxy.js        # SOCKS proxy
+│   │   │   │   │   └── sandbox-violation-store.js
+│   │   │   │   └── utils/
+│   │   │   │       └── config-loader.js      # Config file loader
+│   │   │   └── vendor/
+│   │   │       ├── seccomp-src/
+│   │   │       │   ├── apply-seccomp.c       # C — seccomp BPF loader
+│   │   │       │   └── seccomp-unix-block.c  # C — Unix socket blocker
+│   │   │       └── seccomp/                  # Precompiled binaries (arm64 + x64)
+│   │   │
+│   │   ├── mcpb/                  # MCP Bundle tools (11 files, 75KB)
+│   │   │   └── dist/
+│   │   │       ├── cli/           # pack.js, unpack.js, init.js (26KB scaffolder)
+│   │   │       ├── node/          # files.js, sign.js (12KB), validate.js
+│   │   │       └── shared/        # config.js, log.js
+│   │   │
+│   │   ├── sdk/                   # Anthropic SDK source (40+ files, 232KB)
+│   │   │   ├── client.mjs         # 28KB — main API client
+│   │   │   ├── resources/         # API resources (messages, models, batches, skills)
+│   │   │   ├── lib/
+│   │   │   │   ├── MessageStream.mjs     # 29KB — response streaming
+│   │   │   │   ├── BetaMessageStream.mjs # 31KB — beta streaming
+│   │   │   │   ├── tools/BetaToolRunner.mjs # 18KB — tool use loop
+│   │   │   │   ├── tools/CompactionControl.mjs # Context compaction
+│   │   │   │   └── parser.mjs           # Partial JSON streaming parser
+│   │   │   └── internal/          # Headers, auth, request handling
+│   │   │
+│   │   ├── bedrock-sdk/           # AWS Bedrock (12 files, 36KB)
+│   │   │   ├── client.mjs         # Bedrock API client
+│   │   │   └── core/auth.mjs      # SigV4 signing
+│   │   │
+│   │   ├── vertex-sdk/            # GCP Vertex (7 files, 13KB)
+│   │   │   └── client.mjs         # Vertex AI client with Google auth
+│   │   │
+│   │   └── foundry-sdk/           # Foundry (8 files, 16KB)
+│   │       └── client.mjs         # Foundry client with custom auth
+│   │
+│   └── downloads/                 # Additional packages from npm + GCS
+│       ├── tokenizer/             # Claude's BPE tokenizer
+│       │   ├── claude.json        # 680KB — full vocabulary (64,739 tokens)
+│       │   ├── index.ts           # Tokenizer implementation
+│       │   └── tests/             # Test suite
+│       │
+│       ├── claude-trace/          # OTEL trace viewer for Claude sessions
+│       │   ├── dist/server.cjs    # 838KB — trace server
+│       │   └── viewer/dist/       # Web UI (HTML + JS + CSS)
+│       │
+│       ├── claude-agent-sdk/      # Agent SDK package
+│       │   ├── sdk.mjs            # Main SDK — spawns CLI as subprocess
+│       │   ├── sdk.d.ts           # Full type definitions
+│       │   ├── bridge.mjs         # Session bridge protocol
+│       │   ├── browser-sdk.js     # Browser-compatible SDK
+│       │   ├── embed.js           # Embedding helpers
+│       │   └── manifest.json      # SDK manifest
+│       │
+│       └── official-plugins/      # Official plugin marketplace (from GCS bucket)
+│           └── marketplaces/claude-plugins-official/
+│               ├── plugins/       # 32 official plugins
+│               │   ├── feature-dev/       # Feature dev with agents
+│               │   ├── code-review/       # Code review
+│               │   ├── plugin-dev/        # Plugin development tools
+│               │   ├── mcp-server-dev/    # MCP server builder
+│               │   ├── claude-code-setup/ # Automation recommender
+│               │   ├── claude-md-management/ # CLAUDE.md improver
+│               │   ├── skill-creator/     # Skill creation
+│               │   ├── frontend-design/   # Frontend design generation
+│               │   ├── security-guidance/ # Security review
+│               │   ├── agent-sdk-dev/     # Agent SDK tools
+│               │   ├── hookify/           # Hook creation
+│               │   ├── commit-commands/   # Git commit helpers
+│               │   ├── playground/        # Plugin playground
+│               │   ├── ralph-loop/        # Looping agent
+│               │   ├── math-olympiad/     # Math problem solving
+│               │   ├── typescript-lsp/    # TypeScript LSP
+│               │   ├── pyright-lsp/       # Python LSP
+│               │   ├── rust-analyzer-lsp/ # Rust LSP
+│               │   ├── gopls-lsp/         # Go LSP
+│               │   └── ... (13 more LSP + output style plugins)
+│               └── external_plugins/  # 3rd-party plugins (asana, context7, discord)
+│
+├── shims/                         # Build-time shims
+│   ├── bun-bundle.ts              # Runtime shim for feature() — returns false
+│   ├── bun-bundle.d.ts            # Type declaration
+│   └── globals.d.ts               # MACRO.* type declarations
+│
+├── scripts/
+│   └── generate-sdk-types.ts      # Generates coreTypes.generated.ts from Zod schemas
+│
+├── vendor/                        # Native binaries from npm package (gitignored)
+│   ├── ripgrep/                   # rg binary (arm64/x64 for darwin/linux/win32)
+│   └── audio-capture/             # Voice capture native addon (all platforms)
+│
+├── build.ts                       # Bun build script
+├── package.json                   # Dependencies & scripts
+├── tsconfig.json                  # TypeScript config
+├── bun.lock                       # Bun lockfile
+├── .gitignore
+├── LICENSE                        # MIT
+├── README.md
+│
+├── cli.js.map                     # Original 57MB source map (gitignored, saved locally)
+└── sourcemap-extract.tar.gz       # Full extraction archive (gitignored, saved locally)
+```
+
+## What's Patched
+
+- `src/utils/autoUpdater.ts` — remote version check disabled (line 72: early return)
+- `build.ts` — MACRO.VERSION set to `2.1.88`, all feature() flags return false
+
+## What's Stubbed (marked [STUB] above)
+
+Files that exist but contain minimal placeholder code because:
+1. **Not in leak** — source files excluded from the original zip
+2. **Native bindings** — Rust/Swift code can't be in a source map (executor.ts, subGates.ts)
+3. **Generated files** — were generated by build scripts (coreTypes.generated.ts — we regenerated this)
+4. **Ant-only** — internal Anthropic tools gated by `USER_TYPE === 'ant'`
+
+## Feature Flags (all disabled)
+
+The source uses `feature('FLAG_NAME')` from `bun:bundle` for dead code elimination.
+Our shim returns `false` for all flags. Known flags:
+VOICE_MODE, COORDINATOR_MODE, KAIROS, PROACTIVE, ULTRAPLAN, BRIDGE_MODE,
+BG_SESSIONS, WORKFLOW_SCRIPTS, TRANSCRIPT_CLASSIFIER, TOKEN_BUDGET,
+HISTORY_SNIP, BUDDY, TEAMMEM, AGENT_TRIGGERS, WEB_BROWSER_TOOL,
+MESSAGE_ACTIONS, HOOK_PROMPTS, CACHED_MICROCOMPACT, CHICAGO_MCP,
+ABLATION_BASELINE, DUMP_SYSTEM_PROMPT
+
+## What Works vs What Doesn't
+
+### Fully Working
+- All standard tools (Bash, Edit, Read, Write, Grep, Glob, WebFetch, WebSearch, Agent)
+- Terminal UI (full React/Ink REPL with custom flexbox layout)
+- OAuth authentication (same flow as official)
+- MCP server support
+- Slash commands (/help, /clear, /compact, /resume, etc.)
+- Session persistence and resume
+- Plugin system (full source: loading, agents, commands, hooks, marketplace)
+- Hook system (full source: async registry, agent/HTTP/prompt hooks, SSRF guard)
+- Skill system (full source: 16 bundled skills, skill loader, MCP skill builders)
+- Vim mode
+- Sandbox mode (real @anthropic-ai/sandbox-runtime from npm)
+- AWS Bedrock / GCP Vertex / Foundry backends (real SDKs from npm)
+- Agent SDK integration (set `pathToClaudeCodeExecutable` to `dist/cli.js`)
+- System prompt (full source in src/constants/prompts.ts)
+
+### Not Working
+- **Computer Use** — full logic extracted (137KB toolCalls.ts) but needs native
+  Swift/Rust binaries for screen capture and input. Could be rebuilt using macOS
+  system commands (screencapture, osascript, pbcopy/pbpaste). The 22KB executor
+  wrapper (src/utils/computerUse/executor.ts) shows the exact native API surface.
+- **Auto-mode classifier prompts** — the classifier logic is all there (52KB
+  yoloClassifier.ts) but the 3 prompt .txt files were DCE'd by the
+  TRANSCRIPT_CLASSIFIER feature flag. The code shows the expected format
+  (allow/soft_deny/environment rules with XML tags).
+- **Feature-flagged features** — voice, coordinator, ultraplan, etc. All disabled
+  via feature() shim. The source is there but many depend on backend infra.
+- **Ant-only tools** — TungstenTool, REPLTool, SuggestBackgroundPRTool. Internal
+  tools never available in external builds.
+
+## Source Extraction Summary
+
+| Source | Method | Files | What |
+|--------|--------|-------|------|
+| Original leak | .map file on R2 bucket | 1,929 | Full src/ directory |
+| npm source map | `cli.js.map` in `@anthropic-ai/claude-code` | 4,756 total | Everything bundled into the CLI |
+| npm source map | Same file, `@ant/*` entries | 20 | Computer use + Chrome (private, not on npm) |
+| npm source map | Same file, `@anthropic-ai/*` entries | 105 | SDK, sandbox, mcpb, bedrock, vertex, foundry |
+| npm registry | `npm pack @anthropic-ai/tokenizer` | 15 | Claude's BPE tokenizer + 64,739-token vocabulary |
+| npm registry | `npm pack @anthropic-ai/claude-trace` | 6 | OTEL session trace viewer |
+| npm registry | `npm pack @anthropic-ai/claude-agent-sdk` | 18 | Agent SDK source + types |
+| npm registry | `npm pack @anthropic-ai/sandbox-runtime` | 10 | Extra files not in source map (parent-proxy, seccomp C source) |
+| GCS bucket | `storage.googleapis.com/claude-code-dist-*` | 334 | Official plugin marketplace (32 plugins) |
+| GCS bucket | Same bucket, `manifest.json` per version | 228 versions | Native binary manifests (all platforms, checksums) |
+
+## All @anthropic-ai npm Packages (as of 2026-03-31)
+
+| Package | On npm? | In our repo? | Status |
+|---------|---------|-------------|--------|
+| `@anthropic-ai/claude-code` | Yes | src/ + stubs/ | **Full source extracted** |
+| `@anthropic-ai/claude-agent-sdk` | Yes | stubs/downloads/ | **Downloaded** |
+| `@anthropic-ai/sdk` | Yes | stubs/@anthropic-ai/sdk/ | **Source from map + npm install** |
+| `@anthropic-ai/bedrock-sdk` | Yes | stubs/@anthropic-ai/bedrock-sdk/ | **Source from map + npm install** |
+| `@anthropic-ai/vertex-sdk` | Yes | stubs/@anthropic-ai/vertex-sdk/ | **Source from map + npm install** |
+| `@anthropic-ai/foundry-sdk` | Yes | stubs/@anthropic-ai/foundry-sdk/ | **Source from map + npm install** |
+| `@anthropic-ai/sandbox-runtime` | Yes | stubs/@anthropic-ai/sandbox-runtime/ | **Source from map + npm + extras** |
+| `@anthropic-ai/mcpb` | Yes | stubs/@anthropic-ai/mcpb/ | **Source from map + npm install** |
+| `@anthropic-ai/tokenizer` | Yes | stubs/downloads/tokenizer/ | **Downloaded** |
+| `@anthropic-ai/claude-trace` | Yes | stubs/downloads/claude-trace/ | **Downloaded** |
+| `@ant/computer-use-mcp` | **No** (private) | stubs/@ant/computer-use-mcp/ | **Source from map** |
+| `@ant/claude-for-chrome-mcp` | **No** (private) | stubs/@ant/claude-for-chrome-mcp/ | **Source from map** |
+| `@ant/computer-use-swift` | **No** (private) | stubs/@ant/computer-use-swift/ | **JS loader only** (binary missing) |
+| `@ant/computer-use-input` | **No** (private) | stubs/@ant/computer-use-input/ | **JS loader only** (binary missing) |
+
+## Open GCS Bucket (no auth required)
+
+```
+https://storage.googleapis.com/claude-code-dist-86c565f3-f756-42ad-8dfa-d59b1c096819/
+├── claude-code-releases/
+│   ├── {version}/                 # 228 versions (1.0.100 → 2.1.88)
+│   │   ├── manifest.json          # Platform checksums and sizes
+│   │   ├── darwin-arm64/claude    # macOS ARM binary
+│   │   ├── darwin-x64/claude     # macOS Intel binary
+│   │   ├── linux-arm64/claude    # Linux ARM binary
+│   │   ├── linux-x64/claude      # Linux x64 binary
+│   │   ├── win32-x64/claude.exe  # Windows binary
+│   │   └── ...
+│   └── plugins/
+│       └── claude-plugins-official/
+│           ├── latest             # Points to current hash
+│           └── {hash}.zip         # Plugin marketplace bundles
+└── test-uploads/                  # Just a test.txt
+```
+
+## Keeping This File Updated
+
+**When you modify this repo, update this file:**
+- Added a new stub? Add it to the structure tree with `[STUB]` tag
+- Extracted new source? Add to extraction summary table
+- Found a new npm package? Add to the packages table
+- Changed what works/doesn't? Update the status section
+- New build steps? Update "How to Build & Run"

+ 21 - 0
LICENSE

@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2026 fazxes
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

+ 92 - 0
README.md

@@ -0,0 +1,92 @@
+# Claude Code — Rebuilt from Leaked Source
+
+On March 31, 2026, the full source code of Anthropic's Claude Code CLI was leaked via a `.map` file exposed in their npm registry. This repo contains that source rebuilt into a runnable CLI.
+
+## Quick Start
+
+**Prerequisites:** [Bun](https://bun.sh) v1.1+
+
+```bash
+git clone https://github.com/fazxes/claude-code.git
+cd claude-code
+bun install
+bun run build
+bun dist/cli.js
+```
+
+That's it. The CLI will launch and prompt you to authenticate via OAuth (same flow as the official Claude Code).
+
+## Commands
+
+```bash
+bun dist/cli.js                    # Launch interactive REPL
+bun dist/cli.js --help             # Show all options
+bun dist/cli.js --version          # Show version
+bun dist/cli.js -p "your prompt"   # Non-interactive mode (pipe-friendly)
+bun dist/cli.js auth login         # Authenticate
+```
+
+## How It Was Leaked
+
+[Chaofan Shou (@Fried_rice)](https://x.com/Fried_rice) discovered the leak:
+
+> **"Claude code source code has been leaked via a map file in their npm registry!"**
+>
+> — [@Fried_rice, March 31, 2026](https://x.com/Fried_rice/status/2038894956459290963)
+
+The source map in the published npm package contained a reference to the full, unobfuscated TypeScript source, downloadable as a zip from Anthropic's R2 storage bucket.
+
+## What's Inside
+
+- **~1,900 source files**, 512,000+ lines of TypeScript
+- **Runtime:** Bun
+- **Terminal UI:** React + custom forked [Ink](https://github.com/vadimdemedes/ink)
+- **CLI parser:** Commander.js
+- **Layout engine:** Pure TypeScript port of Yoga (Meta's flexbox engine)
+
+### Architecture
+
+```
+src/
+├── main.tsx                 # Entrypoint (Commander.js CLI parser)
+├── commands.ts              # Command registry
+├── tools.ts                 # Tool registry (~40 tools)
+├── QueryEngine.ts           # LLM query engine (Anthropic API)
+├── context.ts               # System/user context collection
+├── ink/                     # Custom Ink fork (terminal React renderer)
+├── commands/                # Slash command implementations
+├── tools/                   # Agent tool implementations
+├── components/              # React UI components
+├── services/                # API, MCP, OAuth, telemetry
+├── screens/                 # Full-screen UIs (REPL, Doctor)
+├── native-ts/               # Pure TS ports of native modules
+│   ├── yoga-layout/         # Flexbox layout engine
+│   ├── color-diff/          # Syntax-highlighted diffs
+│   └── file-index/          # Fuzzy file search
+└── vim/                     # Vim mode implementation
+```
+
+## What's Stubbed Out
+
+Some internal Anthropic features weren't included in the leak or are behind private packages. These are stubbed with no-ops:
+
+- **Computer Use** (`@ant/computer-use-*`) — screen control tools
+- **Chrome Integration** (`@ant/claude-for-chrome-mcp`) — browser automation
+- **Sandbox Runtime** (`@anthropic-ai/sandbox-runtime`) — sandboxed execution
+- **TungstenTool, REPLTool** — internal-only tools
+- **Context Collapse** — internal compaction feature
+
+The core CLI, all standard tools (Bash, Edit, Read, Write, Grep, Glob, etc.), MCP support, and the full terminal UI work.
+
+## Build Details
+
+The build script (`build.ts`) uses Bun's bundler to:
+1. Bundle 4,500+ modules into a single `dist/cli.js` (~21 MB)
+2. Define `MACRO.*` build-time constants (version, feedback channel)
+3. Externalize optional native deps (`sharp`, `react-devtools-core`)
+
+Feature flags from `bun:bundle`'s `feature()` all return `false` — internal Anthropic features (voice mode, coordinator mode, etc.) are disabled.
+
+## License
+
+This is leaked proprietary source code from Anthropic. Use at your own discretion.

+ 38 - 0
build.ts

@@ -0,0 +1,38 @@
+#!/usr/bin/env bun
+/**
+ * Build script for Claude Code from leaked source.
+ *
+ * Usage: bun build.ts
+ */
+import { $ } from 'bun';
+
+const version = process.env.VERSION || '2.1.88';
+const buildTime = new Date().toISOString();
+
+console.log(`Building Claude Code v${version}...`);
+
+const result = await Bun.build({
+  entrypoints: ['src/entrypoints/cli.tsx'],
+  outdir: 'dist',
+  target: 'bun',
+  sourcemap: 'linked',
+  define: {
+    'MACRO.VERSION': JSON.stringify(version),
+    'MACRO.BUILD_TIME': JSON.stringify(buildTime),
+    'MACRO.FEEDBACK_CHANNEL': JSON.stringify('#claude-code'),
+    'MACRO.ISSUES_EXPLAINER': JSON.stringify(
+      'report the issue at https://github.com/anthropics/claude-code/issues',
+    ),
+  },
+  external: ['react-devtools-core', 'sharp'],
+});
+
+if (!result.success) {
+  console.error('Build failed:');
+  for (const log of result.logs) {
+    console.error(log);
+  }
+  process.exit(1);
+}
+
+console.log(`Build succeeded: dist/cli.js (${(result.outputs[0]!.size / 1024 / 1024).toFixed(1)} MB)`);

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 141 - 0
bun.lock


+ 118 - 0
package.json

@@ -0,0 +1,118 @@
+{
+  "name": "claude-code",
+  "version": "2.1.88",
+  "type": "module",
+  "private": true,
+  "bin": {
+    "claude": "./dist/cli.js"
+  },
+  "dependencies": {
+    "@alcalzone/ansi-tokenize": "^0.1.0",
+    "@anthropic-ai/bedrock-sdk": "^0.26.4",
+    "@anthropic-ai/claude-agent-sdk": "^0.1.0",
+    "@anthropic-ai/foundry-sdk": "^0.2.3",
+    "@anthropic-ai/mcpb": "^2.1.2",
+    "@anthropic-ai/sandbox-runtime": "^0.0.44",
+    "@anthropic-ai/sdk": "^0.52.0",
+    "@anthropic-ai/vertex-sdk": "^0.14.4",
+    "@aws-sdk/client-bedrock": "^3.1020.0",
+    "@aws-sdk/client-bedrock-runtime": "^3.700.0",
+    "@aws-sdk/client-sts": "^3.1020.0",
+    "@azure/identity": "^4.13.1",
+    "@commander-js/extra-typings": "12.1.0",
+    "@growthbook/growthbook": "^1.3.0",
+    "@modelcontextprotocol/sdk": "^1.12.0",
+    "@opentelemetry/api": "^1.9.0",
+    "@opentelemetry/api-logs": "^0.57.0",
+    "@opentelemetry/core": "^1.30.0",
+    "@opentelemetry/exporter-logs-otlp-grpc": "^0.214.0",
+    "@opentelemetry/exporter-logs-otlp-http": "^0.214.0",
+    "@opentelemetry/exporter-logs-otlp-proto": "^0.214.0",
+    "@opentelemetry/exporter-metrics-otlp-grpc": "^0.214.0",
+    "@opentelemetry/exporter-metrics-otlp-http": "^0.214.0",
+    "@opentelemetry/exporter-metrics-otlp-proto": "^0.214.0",
+    "@opentelemetry/exporter-prometheus": "^0.214.0",
+    "@opentelemetry/exporter-trace-otlp-grpc": "^0.214.0",
+    "@opentelemetry/exporter-trace-otlp-http": "^0.214.0",
+    "@opentelemetry/exporter-trace-otlp-proto": "^0.214.0",
+    "@opentelemetry/resources": "^2.6.1",
+    "@opentelemetry/sdk-logs": "^0.57.0",
+    "@opentelemetry/sdk-metrics": "^1.30.0",
+    "@opentelemetry/sdk-trace-base": "^1.30.0",
+    "@opentelemetry/semantic-conventions": "^1.28.0",
+    "ajv": "^8.17.0",
+    "asciichart": "^1.5.0",
+    "auto-bind": "^5.0.0",
+    "axios": "^1.7.0",
+    "bidi-js": "^1.0.0",
+    "chalk": "^5.4.0",
+    "chokidar": "^4.0.0",
+    "cli-boxes": "^3.0.0",
+    "code-excerpt": "^4.0.0",
+    "commander": "12.1.0",
+    "diff": "^7.0.0",
+    "emoji-regex": "^10.4.0",
+    "env-paths": "^3.0.0",
+    "execa": "^9.5.0",
+    "fflate": "^0.8.2",
+    "figures": "^6.1.0",
+    "fuse.js": "^7.0.0",
+    "get-east-asian-width": "^1.3.0",
+    "google-auth-library": "^9.15.0",
+    "highlight.js": "^11.11.0",
+    "https-proxy-agent": "^7.0.0",
+    "ignore": "^7.0.0",
+    "indent-string": "^5.0.0",
+    "jsonc-parser": "^3.3.0",
+    "lodash-es": "^4.17.0",
+    "lru-cache": "^11.0.0",
+    "marked": "^15.0.0",
+    "p-map": "^7.0.0",
+    "picomatch": "^4.0.0",
+    "proper-lockfile": "^4.1.0",
+    "qrcode": "^1.5.0",
+    "react": "^19.1.0",
+    "react-reconciler": "^0.33.0",
+    "semver": "^7.7.0",
+    "sharp": "^0.34.5",
+    "shell-quote": "^1.8.0",
+    "signal-exit": "^4.1.0",
+    "stack-utils": "^2.0.0",
+    "strip-ansi": "^7.1.0",
+    "supports-hyperlinks": "^3.1.0",
+    "tree-kill": "^1.2.0",
+    "turndown": "^7.2.2",
+    "type-fest": "^4.32.0",
+    "undici": "^7.3.0",
+    "usehooks-ts": "^3.1.0",
+    "vscode-jsonrpc": "^8.2.0",
+    "vscode-languageserver-protocol": "^3.17.0",
+    "vscode-languageserver-types": "^3.17.0",
+    "wrap-ansi": "^9.0.0",
+    "ws": "^8.18.0",
+    "xss": "^1.0.0",
+    "yaml": "^2.8.3",
+    "zod": "^3.25.0"
+  },
+  "devDependencies": {
+    "@types/bun": "^1.2.0",
+    "@types/diff": "^7.0.0",
+    "@types/lodash-es": "^4.17.0",
+    "@types/node": "^22.0.0",
+    "@types/picomatch": "^3.0.0",
+    "@types/proper-lockfile": "^4.1.0",
+    "@types/qrcode": "^1.5.0",
+    "@types/react": "^19.0.0",
+    "@types/react-reconciler": "^0.28.0",
+    "@types/semver": "^7.5.0",
+    "@types/shell-quote": "^1.7.0",
+    "@types/stack-utils": "^2.0.0",
+    "@types/ws": "^8.5.0",
+    "typescript": "^5.7.0"
+  },
+  "scripts": {
+    "build": "bun build.ts",
+    "start": "bun dist/cli.js",
+    "typecheck": "tsc --noEmit"
+  }
+}

+ 35 - 0
scripts/generate-sdk-types.ts

@@ -0,0 +1,35 @@
+/**
+ * Generates src/entrypoints/sdk/coreTypes.generated.ts from the Zod schemas
+ * in coreSchemas.ts. Derives TypeScript types via z.infer.
+ */
+import { readFileSync, writeFileSync } from 'fs';
+import { join } from 'path';
+
+const schemasPath = join(import.meta.dir, '../src/entrypoints/sdk/coreSchemas.ts');
+const outputPath = join(import.meta.dir, '../src/entrypoints/sdk/coreTypes.generated.ts');
+
+const content = readFileSync(schemasPath, 'utf-8');
+
+// Extract all exported schema names
+const schemaNames: string[] = [];
+const regex = /^export const (\w+Schema)\b/gm;
+let match: RegExpExecArray | null;
+while ((match = regex.exec(content)) !== null) {
+  schemaNames.push(match[1]!);
+}
+
+// Generate type exports
+const lines = [
+  '// AUTO-GENERATED from coreSchemas.ts — do not edit manually',
+  "import { z } from 'zod/v4'",
+  "import * as schemas from './coreSchemas.js'",
+  '',
+];
+
+for (const schemaName of schemaNames) {
+  const typeName = schemaName.replace(/Schema$/, '');
+  lines.push(`export type ${typeName} = z.infer<ReturnType<typeof schemas.${schemaName}>>;`);
+}
+
+writeFileSync(outputPath, lines.join('\n') + '\n');
+console.log(`Generated ${schemaNames.length} types to ${outputPath}`);

+ 3 - 0
shims/bun-bundle.d.ts

@@ -0,0 +1,3 @@
+declare module 'bun:bundle' {
+  export function feature(name: string): boolean;
+}

+ 5 - 0
shims/bun-bundle.ts

@@ -0,0 +1,5 @@
+// Runtime shim for bun:bundle - all feature flags return false
+// (internal/ant-only features won't be available)
+export function feature(_name: string): boolean {
+  return false;
+}

+ 7 - 0
shims/globals.d.ts

@@ -0,0 +1,7 @@
+// Build-time macro declarations
+declare const MACRO: {
+  VERSION: string;
+  BUILD_TIME: string;
+  FEEDBACK_CHANNEL: string;
+  ISSUES_EXPLAINER: string;
+};

+ 1295 - 0
src/QueryEngine.ts

@@ -0,0 +1,1295 @@
+import { feature } from 'bun:bundle'
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import { randomUUID } from 'crypto'
+import last from 'lodash-es/last.js'
+import {
+  getSessionId,
+  isSessionPersistenceDisabled,
+} from 'src/bootstrap/state.js'
+import type {
+  PermissionMode,
+  SDKCompactBoundaryMessage,
+  SDKMessage,
+  SDKPermissionDenial,
+  SDKStatus,
+  SDKUserMessageReplay,
+} from 'src/entrypoints/agentSdkTypes.js'
+import { accumulateUsage, updateUsage } from 'src/services/api/claude.js'
+import type { NonNullableUsage } from 'src/services/api/logging.js'
+import { EMPTY_USAGE } from 'src/services/api/logging.js'
+import stripAnsi from 'strip-ansi'
+import type { Command } from './commands.js'
+import { getSlashCommandToolSkills } from './commands.js'
+import {
+  LOCAL_COMMAND_STDERR_TAG,
+  LOCAL_COMMAND_STDOUT_TAG,
+} from './constants/xml.js'
+import {
+  getModelUsage,
+  getTotalAPIDuration,
+  getTotalCost,
+} from './cost-tracker.js'
+import type { CanUseToolFn } from './hooks/useCanUseTool.js'
+import { loadMemoryPrompt } from './memdir/memdir.js'
+import { hasAutoMemPathOverride } from './memdir/paths.js'
+import { query } from './query.js'
+import { categorizeRetryableAPIError } from './services/api/errors.js'
+import type { MCPServerConnection } from './services/mcp/types.js'
+import type { AppState } from './state/AppState.js'
+import { type Tools, type ToolUseContext, toolMatchesName } from './Tool.js'
+import type { AgentDefinition } from './tools/AgentTool/loadAgentsDir.js'
+import { SYNTHETIC_OUTPUT_TOOL_NAME } from './tools/SyntheticOutputTool/SyntheticOutputTool.js'
+import type { Message } from './types/message.js'
+import type { OrphanedPermission } from './types/textInputTypes.js'
+import { createAbortController } from './utils/abortController.js'
+import type { AttributionState } from './utils/commitAttribution.js'
+import { getGlobalConfig } from './utils/config.js'
+import { getCwd } from './utils/cwd.js'
+import { isBareMode, isEnvTruthy } from './utils/envUtils.js'
+import { getFastModeState } from './utils/fastMode.js'
+import {
+  type FileHistoryState,
+  fileHistoryEnabled,
+  fileHistoryMakeSnapshot,
+} from './utils/fileHistory.js'
+import {
+  cloneFileStateCache,
+  type FileStateCache,
+} from './utils/fileStateCache.js'
+import { headlessProfilerCheckpoint } from './utils/headlessProfiler.js'
+import { registerStructuredOutputEnforcement } from './utils/hooks/hookHelpers.js'
+import { getInMemoryErrors } from './utils/log.js'
+import { countToolCalls, SYNTHETIC_MESSAGES } from './utils/messages.js'
+import {
+  getMainLoopModel,
+  parseUserSpecifiedModel,
+} from './utils/model/model.js'
+import { loadAllPluginsCacheOnly } from './utils/plugins/pluginLoader.js'
+import {
+  type ProcessUserInputContext,
+  processUserInput,
+} from './utils/processUserInput/processUserInput.js'
+import { fetchSystemPromptParts } from './utils/queryContext.js'
+import { setCwd } from './utils/Shell.js'
+import {
+  flushSessionStorage,
+  recordTranscript,
+} from './utils/sessionStorage.js'
+import { asSystemPrompt } from './utils/systemPromptType.js'
+import { resolveThemeSetting } from './utils/systemTheme.js'
+import {
+  shouldEnableThinkingByDefault,
+  type ThinkingConfig,
+} from './utils/thinking.js'
+
+// Lazy: MessageSelector.tsx pulls React/ink; only needed for message filtering at query time
+/* eslint-disable @typescript-eslint/no-require-imports */
+const messageSelector =
+  (): typeof import('src/components/MessageSelector.js') =>
+    require('src/components/MessageSelector.js')
+
+import {
+  localCommandOutputToSDKAssistantMessage,
+  toSDKCompactMetadata,
+} from './utils/messages/mappers.js'
+import {
+  buildSystemInitMessage,
+  sdkCompatToolName,
+} from './utils/messages/systemInit.js'
+import {
+  getScratchpadDir,
+  isScratchpadEnabled,
+} from './utils/permissions/filesystem.js'
+/* eslint-enable @typescript-eslint/no-require-imports */
+import {
+  handleOrphanedPermission,
+  isResultSuccessful,
+  normalizeMessage,
+} from './utils/queryHelpers.js'
+
+// Dead code elimination: conditional import for coordinator mode
+/* eslint-disable @typescript-eslint/no-require-imports */
+const getCoordinatorUserContext: (
+  mcpClients: ReadonlyArray<{ name: string }>,
+  scratchpadDir?: string,
+) => { [k: string]: string } = feature('COORDINATOR_MODE')
+  ? require('./coordinator/coordinatorMode.js').getCoordinatorUserContext
+  : () => ({})
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+// Dead code elimination: conditional import for snip compaction
+/* eslint-disable @typescript-eslint/no-require-imports */
+const snipModule = feature('HISTORY_SNIP')
+  ? (require('./services/compact/snipCompact.js') as typeof import('./services/compact/snipCompact.js'))
+  : null
+const snipProjection = feature('HISTORY_SNIP')
+  ? (require('./services/compact/snipProjection.js') as typeof import('./services/compact/snipProjection.js'))
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+export type QueryEngineConfig = {
+  cwd: string
+  tools: Tools
+  commands: Command[]
+  mcpClients: MCPServerConnection[]
+  agents: AgentDefinition[]
+  canUseTool: CanUseToolFn
+  getAppState: () => AppState
+  setAppState: (f: (prev: AppState) => AppState) => void
+  initialMessages?: Message[]
+  readFileCache: FileStateCache
+  customSystemPrompt?: string
+  appendSystemPrompt?: string
+  userSpecifiedModel?: string
+  fallbackModel?: string
+  thinkingConfig?: ThinkingConfig
+  maxTurns?: number
+  maxBudgetUsd?: number
+  taskBudget?: { total: number }
+  jsonSchema?: Record<string, unknown>
+  verbose?: boolean
+  replayUserMessages?: boolean
+  /** Handler for URL elicitations triggered by MCP tool -32042 errors. */
+  handleElicitation?: ToolUseContext['handleElicitation']
+  includePartialMessages?: boolean
+  setSDKStatus?: (status: SDKStatus) => void
+  abortController?: AbortController
+  orphanedPermission?: OrphanedPermission
+  /**
+   * Snip-boundary handler: receives each yielded system message plus the
+   * current mutableMessages store. Returns undefined if the message is not a
+   * snip boundary; otherwise returns the replayed snip result. Injected by
+   * ask() when HISTORY_SNIP is enabled so feature-gated strings stay inside
+   * the gated module (keeps QueryEngine free of excluded strings and testable
+   * despite feature() returning false under bun test). SDK-only: the REPL
+   * keeps full history for UI scrollback and projects on demand via
+   * projectSnippedView; QueryEngine truncates here to bound memory in long
+   * headless sessions (no UI to preserve).
+   */
+  snipReplay?: (
+    yieldedSystemMsg: Message,
+    store: Message[],
+  ) => { messages: Message[]; executed: boolean } | undefined
+}
+
+/**
+ * QueryEngine owns the query lifecycle and session state for a conversation.
+ * It extracts the core logic from ask() into a standalone class that can be
+ * used by both the headless/SDK path and (in a future phase) the REPL.
+ *
+ * One QueryEngine per conversation. Each submitMessage() call starts a new
+ * turn within the same conversation. State (messages, file cache, usage, etc.)
+ * persists across turns.
+ */
+export class QueryEngine {
+  private config: QueryEngineConfig
+  private mutableMessages: Message[]
+  private abortController: AbortController
+  private permissionDenials: SDKPermissionDenial[]
+  private totalUsage: NonNullableUsage
+  private hasHandledOrphanedPermission = false
+  private readFileState: FileStateCache
+  // Turn-scoped skill discovery tracking (feeds was_discovered on
+  // tengu_skill_tool_invocation). Must persist across the two
+  // processUserInputContext rebuilds inside submitMessage, but is cleared
+  // at the start of each submitMessage to avoid unbounded growth across
+  // many turns in SDK mode.
+  private discoveredSkillNames = new Set<string>()
+  private loadedNestedMemoryPaths = new Set<string>()
+
+  constructor(config: QueryEngineConfig) {
+    this.config = config
+    this.mutableMessages = config.initialMessages ?? []
+    this.abortController = config.abortController ?? createAbortController()
+    this.permissionDenials = []
+    this.readFileState = config.readFileCache
+    this.totalUsage = EMPTY_USAGE
+  }
+
+  async *submitMessage(
+    prompt: string | ContentBlockParam[],
+    options?: { uuid?: string; isMeta?: boolean },
+  ): AsyncGenerator<SDKMessage, void, unknown> {
+    const {
+      cwd,
+      commands,
+      tools,
+      mcpClients,
+      verbose = false,
+      thinkingConfig,
+      maxTurns,
+      maxBudgetUsd,
+      taskBudget,
+      canUseTool,
+      customSystemPrompt,
+      appendSystemPrompt,
+      userSpecifiedModel,
+      fallbackModel,
+      jsonSchema,
+      getAppState,
+      setAppState,
+      replayUserMessages = false,
+      includePartialMessages = false,
+      agents = [],
+      setSDKStatus,
+      orphanedPermission,
+    } = this.config
+
+    this.discoveredSkillNames.clear()
+    setCwd(cwd)
+    const persistSession = !isSessionPersistenceDisabled()
+    const startTime = Date.now()
+
+    // Wrap canUseTool to track permission denials
+    const wrappedCanUseTool: CanUseToolFn = async (
+      tool,
+      input,
+      toolUseContext,
+      assistantMessage,
+      toolUseID,
+      forceDecision,
+    ) => {
+      const result = await canUseTool(
+        tool,
+        input,
+        toolUseContext,
+        assistantMessage,
+        toolUseID,
+        forceDecision,
+      )
+
+      // Track denials for SDK reporting
+      if (result.behavior !== 'allow') {
+        this.permissionDenials.push({
+          tool_name: sdkCompatToolName(tool.name),
+          tool_use_id: toolUseID,
+          tool_input: input,
+        })
+      }
+
+      return result
+    }
+
+    const initialAppState = getAppState()
+    const initialMainLoopModel = userSpecifiedModel
+      ? parseUserSpecifiedModel(userSpecifiedModel)
+      : getMainLoopModel()
+
+    const initialThinkingConfig: ThinkingConfig = thinkingConfig
+      ? thinkingConfig
+      : shouldEnableThinkingByDefault() !== false
+        ? { type: 'adaptive' }
+        : { type: 'disabled' }
+
+    headlessProfilerCheckpoint('before_getSystemPrompt')
+    // Narrow once so TS tracks the type through the conditionals below.
+    const customPrompt =
+      typeof customSystemPrompt === 'string' ? customSystemPrompt : undefined
+    const {
+      defaultSystemPrompt,
+      userContext: baseUserContext,
+      systemContext,
+    } = await fetchSystemPromptParts({
+      tools,
+      mainLoopModel: initialMainLoopModel,
+      additionalWorkingDirectories: Array.from(
+        initialAppState.toolPermissionContext.additionalWorkingDirectories.keys(),
+      ),
+      mcpClients,
+      customSystemPrompt: customPrompt,
+    })
+    headlessProfilerCheckpoint('after_getSystemPrompt')
+    const userContext = {
+      ...baseUserContext,
+      ...getCoordinatorUserContext(
+        mcpClients,
+        isScratchpadEnabled() ? getScratchpadDir() : undefined,
+      ),
+    }
+
+    // When an SDK caller provides a custom system prompt AND has set
+    // CLAUDE_COWORK_MEMORY_PATH_OVERRIDE, inject the memory-mechanics prompt.
+    // The env var is an explicit opt-in signal — the caller has wired up
+    // a memory directory and needs Claude to know how to use it (which
+    // Write/Edit tools to call, MEMORY.md filename, loading semantics).
+    // The caller can layer their own policy text via appendSystemPrompt.
+    const memoryMechanicsPrompt =
+      customPrompt !== undefined && hasAutoMemPathOverride()
+        ? await loadMemoryPrompt()
+        : null
+
+    const systemPrompt = asSystemPrompt([
+      ...(customPrompt !== undefined ? [customPrompt] : defaultSystemPrompt),
+      ...(memoryMechanicsPrompt ? [memoryMechanicsPrompt] : []),
+      ...(appendSystemPrompt ? [appendSystemPrompt] : []),
+    ])
+
+    // Register function hook for structured output enforcement
+    const hasStructuredOutputTool = tools.some(t =>
+      toolMatchesName(t, SYNTHETIC_OUTPUT_TOOL_NAME),
+    )
+    if (jsonSchema && hasStructuredOutputTool) {
+      registerStructuredOutputEnforcement(setAppState, getSessionId())
+    }
+
+    let processUserInputContext: ProcessUserInputContext = {
+      messages: this.mutableMessages,
+      // Slash commands that mutate the message array (e.g. /force-snip)
+      // call setMessages(fn).  In interactive mode this writes back to
+      // AppState; in print mode we write back to mutableMessages so the
+      // rest of the query loop (push at :389, snapshot at :392) sees
+      // the result.  The second processUserInputContext below (after
+      // slash-command processing) keeps the no-op — nothing else calls
+      // setMessages past that point.
+      setMessages: fn => {
+        this.mutableMessages = fn(this.mutableMessages)
+      },
+      onChangeAPIKey: () => {},
+      handleElicitation: this.config.handleElicitation,
+      options: {
+        commands,
+        debug: false, // we use stdout, so don't want to clobber it
+        tools,
+        verbose,
+        mainLoopModel: initialMainLoopModel,
+        thinkingConfig: initialThinkingConfig,
+        mcpClients,
+        mcpResources: {},
+        ideInstallationStatus: null,
+        isNonInteractiveSession: true,
+        customSystemPrompt,
+        appendSystemPrompt,
+        agentDefinitions: { activeAgents: agents, allAgents: [] },
+        theme: resolveThemeSetting(getGlobalConfig().theme),
+        maxBudgetUsd,
+      },
+      getAppState,
+      setAppState,
+      abortController: this.abortController,
+      readFileState: this.readFileState,
+      nestedMemoryAttachmentTriggers: new Set<string>(),
+      loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,
+      dynamicSkillDirTriggers: new Set<string>(),
+      discoveredSkillNames: this.discoveredSkillNames,
+      setInProgressToolUseIDs: () => {},
+      setResponseLength: () => {},
+      updateFileHistoryState: (
+        updater: (prev: FileHistoryState) => FileHistoryState,
+      ) => {
+        setAppState(prev => {
+          const updated = updater(prev.fileHistory)
+          if (updated === prev.fileHistory) return prev
+          return { ...prev, fileHistory: updated }
+        })
+      },
+      updateAttributionState: (
+        updater: (prev: AttributionState) => AttributionState,
+      ) => {
+        setAppState(prev => {
+          const updated = updater(prev.attribution)
+          if (updated === prev.attribution) return prev
+          return { ...prev, attribution: updated }
+        })
+      },
+      setSDKStatus,
+    }
+
+    // Handle orphaned permission (only once per engine lifetime)
+    if (orphanedPermission && !this.hasHandledOrphanedPermission) {
+      this.hasHandledOrphanedPermission = true
+      for await (const message of handleOrphanedPermission(
+        orphanedPermission,
+        tools,
+        this.mutableMessages,
+        processUserInputContext,
+      )) {
+        yield message
+      }
+    }
+
+    const {
+      messages: messagesFromUserInput,
+      shouldQuery,
+      allowedTools,
+      model: modelFromUserInput,
+      resultText,
+    } = await processUserInput({
+      input: prompt,
+      mode: 'prompt',
+      setToolJSX: () => {},
+      context: {
+        ...processUserInputContext,
+        messages: this.mutableMessages,
+      },
+      messages: this.mutableMessages,
+      uuid: options?.uuid,
+      isMeta: options?.isMeta,
+      querySource: 'sdk',
+    })
+
+    // Push new messages, including user input and any attachments
+    this.mutableMessages.push(...messagesFromUserInput)
+
+    // Update params to reflect updates from processing /slash commands
+    const messages = [...this.mutableMessages]
+
+    // Persist the user's message(s) to transcript BEFORE entering the query
+    // loop. The for-await below only calls recordTranscript when ask() yields
+    // an assistant/user/compact_boundary message — which doesn't happen until
+    // the API responds. If the process is killed before that (e.g. user clicks
+    // Stop in cowork seconds after send), the transcript is left with only
+    // queue-operation entries; getLastSessionLog filters those out, returns
+    // null, and --resume fails with "No conversation found". Writing now makes
+    // the transcript resumable from the point the user message was accepted,
+    // even if no API response ever arrives.
+    //
+    // --bare / SIMPLE: fire-and-forget. Scripted calls don't --resume after
+    // kill-mid-request. The await is ~4ms on SSD, ~30ms under disk contention
+    // — the single largest controllable critical-path cost after module eval.
+    // Transcript is still written (for post-hoc debugging); just not blocking.
+    if (persistSession && messagesFromUserInput.length > 0) {
+      const transcriptPromise = recordTranscript(messages)
+      if (isBareMode()) {
+        void transcriptPromise
+      } else {
+        await transcriptPromise
+        if (
+          isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+          isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+        ) {
+          await flushSessionStorage()
+        }
+      }
+    }
+
+    // Filter messages that should be acknowledged after transcript
+    const replayableMessages = messagesFromUserInput.filter(
+      msg =>
+        (msg.type === 'user' &&
+          !msg.isMeta && // Skip synthetic caveat messages
+          !msg.toolUseResult && // Skip tool results (they'll be acked from query)
+          messageSelector().selectableUserMessagesFilter(msg)) || // Skip non-user-authored messages (task notifications, etc.)
+        (msg.type === 'system' && msg.subtype === 'compact_boundary'), // Always ack compact boundaries
+    )
+    const messagesToAck = replayUserMessages ? replayableMessages : []
+
+    // Update the ToolPermissionContext based on user input processing (as necessary)
+    setAppState(prev => ({
+      ...prev,
+      toolPermissionContext: {
+        ...prev.toolPermissionContext,
+        alwaysAllowRules: {
+          ...prev.toolPermissionContext.alwaysAllowRules,
+          command: allowedTools,
+        },
+      },
+    }))
+
+    const mainLoopModel = modelFromUserInput ?? initialMainLoopModel
+
+    // Recreate after processing the prompt to pick up updated messages and
+    // model (from slash commands).
+    processUserInputContext = {
+      messages,
+      setMessages: () => {},
+      onChangeAPIKey: () => {},
+      handleElicitation: this.config.handleElicitation,
+      options: {
+        commands,
+        debug: false,
+        tools,
+        verbose,
+        mainLoopModel,
+        thinkingConfig: initialThinkingConfig,
+        mcpClients,
+        mcpResources: {},
+        ideInstallationStatus: null,
+        isNonInteractiveSession: true,
+        customSystemPrompt,
+        appendSystemPrompt,
+        theme: resolveThemeSetting(getGlobalConfig().theme),
+        agentDefinitions: { activeAgents: agents, allAgents: [] },
+        maxBudgetUsd,
+      },
+      getAppState,
+      setAppState,
+      abortController: this.abortController,
+      readFileState: this.readFileState,
+      nestedMemoryAttachmentTriggers: new Set<string>(),
+      loadedNestedMemoryPaths: this.loadedNestedMemoryPaths,
+      dynamicSkillDirTriggers: new Set<string>(),
+      discoveredSkillNames: this.discoveredSkillNames,
+      setInProgressToolUseIDs: () => {},
+      setResponseLength: () => {},
+      updateFileHistoryState: processUserInputContext.updateFileHistoryState,
+      updateAttributionState: processUserInputContext.updateAttributionState,
+      setSDKStatus,
+    }
+
+    headlessProfilerCheckpoint('before_skills_plugins')
+    // Cache-only: headless/SDK/CCR startup must not block on network for
+    // ref-tracked plugins. CCR populates the cache via CLAUDE_CODE_SYNC_PLUGIN_INSTALL
+    // (headlessPluginInstall) or CLAUDE_CODE_PLUGIN_SEED_DIR before this runs;
+    // SDK callers that need fresh source can call /reload-plugins.
+    const [skills, { enabled: enabledPlugins }] = await Promise.all([
+      getSlashCommandToolSkills(getCwd()),
+      loadAllPluginsCacheOnly(),
+    ])
+    headlessProfilerCheckpoint('after_skills_plugins')
+
+    yield buildSystemInitMessage({
+      tools,
+      mcpClients,
+      model: mainLoopModel,
+      permissionMode: initialAppState.toolPermissionContext
+        .mode as PermissionMode, // TODO: avoid the cast
+      commands,
+      agents,
+      skills,
+      plugins: enabledPlugins,
+      fastMode: initialAppState.fastMode,
+    })
+
+    // Record when system message is yielded for headless latency tracking
+    headlessProfilerCheckpoint('system_message_yielded')
+
+    if (!shouldQuery) {
+      // Return the results of local slash commands.
+      // Use messagesFromUserInput (not replayableMessages) for command output
+      // because selectableUserMessagesFilter excludes local-command-stdout tags.
+      for (const msg of messagesFromUserInput) {
+        if (
+          msg.type === 'user' &&
+          typeof msg.message.content === 'string' &&
+          (msg.message.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||
+            msg.message.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`) ||
+            msg.isCompactSummary)
+        ) {
+          yield {
+            type: 'user',
+            message: {
+              ...msg.message,
+              content: stripAnsi(msg.message.content),
+            },
+            session_id: getSessionId(),
+            parent_tool_use_id: null,
+            uuid: msg.uuid,
+            timestamp: msg.timestamp,
+            isReplay: !msg.isCompactSummary,
+            isSynthetic: msg.isMeta || msg.isVisibleInTranscriptOnly,
+          } as SDKUserMessageReplay
+        }
+
+        // Local command output — yield as a synthetic assistant message so
+        // RC renders it as assistant-style text rather than a user bubble.
+        // Emitted as assistant (not the dedicated SDKLocalCommandOutputMessage
+        // system subtype) so mobile clients + session-ingress can parse it.
+        if (
+          msg.type === 'system' &&
+          msg.subtype === 'local_command' &&
+          typeof msg.content === 'string' &&
+          (msg.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`) ||
+            msg.content.includes(`<${LOCAL_COMMAND_STDERR_TAG}>`))
+        ) {
+          yield localCommandOutputToSDKAssistantMessage(msg.content, msg.uuid)
+        }
+
+        if (msg.type === 'system' && msg.subtype === 'compact_boundary') {
+          yield {
+            type: 'system',
+            subtype: 'compact_boundary' as const,
+            session_id: getSessionId(),
+            uuid: msg.uuid,
+            compact_metadata: toSDKCompactMetadata(msg.compactMetadata),
+          } as SDKCompactBoundaryMessage
+        }
+      }
+
+      if (persistSession) {
+        await recordTranscript(messages)
+        if (
+          isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+          isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+        ) {
+          await flushSessionStorage()
+        }
+      }
+
+      yield {
+        type: 'result',
+        subtype: 'success',
+        is_error: false,
+        duration_ms: Date.now() - startTime,
+        duration_api_ms: getTotalAPIDuration(),
+        num_turns: messages.length - 1,
+        result: resultText ?? '',
+        stop_reason: null,
+        session_id: getSessionId(),
+        total_cost_usd: getTotalCost(),
+        usage: this.totalUsage,
+        modelUsage: getModelUsage(),
+        permission_denials: this.permissionDenials,
+        fast_mode_state: getFastModeState(
+          mainLoopModel,
+          initialAppState.fastMode,
+        ),
+        uuid: randomUUID(),
+      }
+      return
+    }
+
+    if (fileHistoryEnabled() && persistSession) {
+      messagesFromUserInput
+        .filter(messageSelector().selectableUserMessagesFilter)
+        .forEach(message => {
+          void fileHistoryMakeSnapshot(
+            (updater: (prev: FileHistoryState) => FileHistoryState) => {
+              setAppState(prev => ({
+                ...prev,
+                fileHistory: updater(prev.fileHistory),
+              }))
+            },
+            message.uuid,
+          )
+        })
+    }
+
+    // Track current message usage (reset on each message_start)
+    let currentMessageUsage: NonNullableUsage = EMPTY_USAGE
+    let turnCount = 1
+    let hasAcknowledgedInitialMessages = false
+    // Track structured output from StructuredOutput tool calls
+    let structuredOutputFromTool: unknown
+    // Track the last stop_reason from assistant messages
+    let lastStopReason: string | null = null
+    // Reference-based watermark so error_during_execution's errors[] is
+    // turn-scoped. A length-based index breaks when the 100-entry ring buffer
+    // shift()s during the turn — the index slides. If this entry is rotated
+    // out, lastIndexOf returns -1 and we include everything (safe fallback).
+    const errorLogWatermark = getInMemoryErrors().at(-1)
+    // Snapshot count before this query for delta-based retry limiting
+    const initialStructuredOutputCalls = jsonSchema
+      ? countToolCalls(this.mutableMessages, SYNTHETIC_OUTPUT_TOOL_NAME)
+      : 0
+
+    for await (const message of query({
+      messages,
+      systemPrompt,
+      userContext,
+      systemContext,
+      canUseTool: wrappedCanUseTool,
+      toolUseContext: processUserInputContext,
+      fallbackModel,
+      querySource: 'sdk',
+      maxTurns,
+      taskBudget,
+    })) {
+      // Record assistant, user, and compact boundary messages
+      if (
+        message.type === 'assistant' ||
+        message.type === 'user' ||
+        (message.type === 'system' && message.subtype === 'compact_boundary')
+      ) {
+        // Before writing a compact boundary, flush any in-memory-only
+        // messages up through the preservedSegment tail. Attachments and
+        // progress are now recorded inline (their switch cases below), but
+        // this flush still matters for the preservedSegment tail walk.
+        // If the SDK subprocess restarts before then (claude-desktop kills
+        // between turns), tailUuid points to a never-written message →
+        // applyPreservedSegmentRelinks fails its tail→head walk → returns
+        // without pruning → resume loads full pre-compact history.
+        if (
+          persistSession &&
+          message.type === 'system' &&
+          message.subtype === 'compact_boundary'
+        ) {
+          const tailUuid = message.compactMetadata?.preservedSegment?.tailUuid
+          if (tailUuid) {
+            const tailIdx = this.mutableMessages.findLastIndex(
+              m => m.uuid === tailUuid,
+            )
+            if (tailIdx !== -1) {
+              await recordTranscript(this.mutableMessages.slice(0, tailIdx + 1))
+            }
+          }
+        }
+        messages.push(message)
+        if (persistSession) {
+          // Fire-and-forget for assistant messages. claude.ts yields one
+          // assistant message per content block, then mutates the last
+          // one's message.usage/stop_reason on message_delta — relying on
+          // the write queue's 100ms lazy jsonStringify. Awaiting here
+          // blocks ask()'s generator, so message_delta can't run until
+          // every block is consumed; the drain timer (started at block 1)
+          // elapses first. Interactive CC doesn't hit this because
+          // useLogMessages.ts fire-and-forgets. enqueueWrite is
+          // order-preserving so fire-and-forget here is safe.
+          if (message.type === 'assistant') {
+            void recordTranscript(messages)
+          } else {
+            await recordTranscript(messages)
+          }
+        }
+
+        // Acknowledge initial user messages after first transcript recording
+        if (!hasAcknowledgedInitialMessages && messagesToAck.length > 0) {
+          hasAcknowledgedInitialMessages = true
+          for (const msgToAck of messagesToAck) {
+            if (msgToAck.type === 'user') {
+              yield {
+                type: 'user',
+                message: msgToAck.message,
+                session_id: getSessionId(),
+                parent_tool_use_id: null,
+                uuid: msgToAck.uuid,
+                timestamp: msgToAck.timestamp,
+                isReplay: true,
+              } as SDKUserMessageReplay
+            }
+          }
+        }
+      }
+
+      if (message.type === 'user') {
+        turnCount++
+      }
+
+      switch (message.type) {
+        case 'tombstone':
+          // Tombstone messages are control signals for removing messages, skip them
+          break
+        case 'assistant':
+          // Capture stop_reason if already set (synthetic messages). For
+          // streamed responses, this is null at content_block_stop time;
+          // the real value arrives via message_delta (handled below).
+          if (message.message.stop_reason != null) {
+            lastStopReason = message.message.stop_reason
+          }
+          this.mutableMessages.push(message)
+          yield* normalizeMessage(message)
+          break
+        case 'progress':
+          this.mutableMessages.push(message)
+          // Record inline so the dedup loop in the next ask() call sees it
+          // as already-recorded. Without this, deferred progress interleaves
+          // with already-recorded tool_results in mutableMessages, and the
+          // dedup walk freezes startingParentUuid at the wrong message —
+          // forking the chain and orphaning the conversation on resume.
+          if (persistSession) {
+            messages.push(message)
+            void recordTranscript(messages)
+          }
+          yield* normalizeMessage(message)
+          break
+        case 'user':
+          this.mutableMessages.push(message)
+          yield* normalizeMessage(message)
+          break
+        case 'stream_event':
+          if (message.event.type === 'message_start') {
+            // Reset current message usage for new message
+            currentMessageUsage = EMPTY_USAGE
+            currentMessageUsage = updateUsage(
+              currentMessageUsage,
+              message.event.message.usage,
+            )
+          }
+          if (message.event.type === 'message_delta') {
+            currentMessageUsage = updateUsage(
+              currentMessageUsage,
+              message.event.usage,
+            )
+            // Capture stop_reason from message_delta. The assistant message
+            // is yielded at content_block_stop with stop_reason=null; the
+            // real value only arrives here (see claude.ts message_delta
+            // handler). Without this, result.stop_reason is always null.
+            if (message.event.delta.stop_reason != null) {
+              lastStopReason = message.event.delta.stop_reason
+            }
+          }
+          if (message.event.type === 'message_stop') {
+            // Accumulate current message usage into total
+            this.totalUsage = accumulateUsage(
+              this.totalUsage,
+              currentMessageUsage,
+            )
+          }
+
+          if (includePartialMessages) {
+            yield {
+              type: 'stream_event' as const,
+              event: message.event,
+              session_id: getSessionId(),
+              parent_tool_use_id: null,
+              uuid: randomUUID(),
+            }
+          }
+
+          break
+        case 'attachment':
+          this.mutableMessages.push(message)
+          // Record inline (same reason as progress above).
+          if (persistSession) {
+            messages.push(message)
+            void recordTranscript(messages)
+          }
+
+          // Extract structured output from StructuredOutput tool calls
+          if (message.attachment.type === 'structured_output') {
+            structuredOutputFromTool = message.attachment.data
+          }
+          // Handle max turns reached signal from query.ts
+          else if (message.attachment.type === 'max_turns_reached') {
+            if (persistSession) {
+              if (
+                isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+                isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+              ) {
+                await flushSessionStorage()
+              }
+            }
+            yield {
+              type: 'result',
+              subtype: 'error_max_turns',
+              duration_ms: Date.now() - startTime,
+              duration_api_ms: getTotalAPIDuration(),
+              is_error: true,
+              num_turns: message.attachment.turnCount,
+              stop_reason: lastStopReason,
+              session_id: getSessionId(),
+              total_cost_usd: getTotalCost(),
+              usage: this.totalUsage,
+              modelUsage: getModelUsage(),
+              permission_denials: this.permissionDenials,
+              fast_mode_state: getFastModeState(
+                mainLoopModel,
+                initialAppState.fastMode,
+              ),
+              uuid: randomUUID(),
+              errors: [
+                `Reached maximum number of turns (${message.attachment.maxTurns})`,
+              ],
+            }
+            return
+          }
+          // Yield queued_command attachments as SDK user message replays
+          else if (
+            replayUserMessages &&
+            message.attachment.type === 'queued_command'
+          ) {
+            yield {
+              type: 'user',
+              message: {
+                role: 'user' as const,
+                content: message.attachment.prompt,
+              },
+              session_id: getSessionId(),
+              parent_tool_use_id: null,
+              uuid: message.attachment.source_uuid || message.uuid,
+              timestamp: message.timestamp,
+              isReplay: true,
+            } as SDKUserMessageReplay
+          }
+          break
+        case 'stream_request_start':
+          // Don't yield stream request start messages
+          break
+        case 'system': {
+          // Snip boundary: replay on our store to remove zombie messages and
+          // stale markers. The yielded boundary is a signal, not data to push —
+          // the replay produces its own equivalent boundary. Without this,
+          // markers persist and re-trigger on every turn, and mutableMessages
+          // never shrinks (memory leak in long SDK sessions). The subtype
+          // check lives inside the injected callback so feature-gated strings
+          // stay out of this file (excluded-strings check).
+          const snipResult = this.config.snipReplay?.(
+            message,
+            this.mutableMessages,
+          )
+          if (snipResult !== undefined) {
+            if (snipResult.executed) {
+              this.mutableMessages.length = 0
+              this.mutableMessages.push(...snipResult.messages)
+            }
+            break
+          }
+          this.mutableMessages.push(message)
+          // Yield compact boundary messages to SDK
+          if (
+            message.subtype === 'compact_boundary' &&
+            message.compactMetadata
+          ) {
+            // Release pre-compaction messages for GC. The boundary was just
+            // pushed so it's the last element. query.ts already uses
+            // getMessagesAfterCompactBoundary() internally, so only
+            // post-boundary messages are needed going forward.
+            const mutableBoundaryIdx = this.mutableMessages.length - 1
+            if (mutableBoundaryIdx > 0) {
+              this.mutableMessages.splice(0, mutableBoundaryIdx)
+            }
+            const localBoundaryIdx = messages.length - 1
+            if (localBoundaryIdx > 0) {
+              messages.splice(0, localBoundaryIdx)
+            }
+
+            yield {
+              type: 'system',
+              subtype: 'compact_boundary' as const,
+              session_id: getSessionId(),
+              uuid: message.uuid,
+              compact_metadata: toSDKCompactMetadata(message.compactMetadata),
+            }
+          }
+          if (message.subtype === 'api_error') {
+            yield {
+              type: 'system',
+              subtype: 'api_retry' as const,
+              attempt: message.retryAttempt,
+              max_retries: message.maxRetries,
+              retry_delay_ms: message.retryInMs,
+              error_status: message.error.status ?? null,
+              error: categorizeRetryableAPIError(message.error),
+              session_id: getSessionId(),
+              uuid: message.uuid,
+            }
+          }
+          // Don't yield other system messages in headless mode
+          break
+        }
+        case 'tool_use_summary':
+          // Yield tool use summary messages to SDK
+          yield {
+            type: 'tool_use_summary' as const,
+            summary: message.summary,
+            preceding_tool_use_ids: message.precedingToolUseIds,
+            session_id: getSessionId(),
+            uuid: message.uuid,
+          }
+          break
+      }
+
+      // Check if USD budget has been exceeded
+      if (maxBudgetUsd !== undefined && getTotalCost() >= maxBudgetUsd) {
+        if (persistSession) {
+          if (
+            isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+            isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+          ) {
+            await flushSessionStorage()
+          }
+        }
+        yield {
+          type: 'result',
+          subtype: 'error_max_budget_usd',
+          duration_ms: Date.now() - startTime,
+          duration_api_ms: getTotalAPIDuration(),
+          is_error: true,
+          num_turns: turnCount,
+          stop_reason: lastStopReason,
+          session_id: getSessionId(),
+          total_cost_usd: getTotalCost(),
+          usage: this.totalUsage,
+          modelUsage: getModelUsage(),
+          permission_denials: this.permissionDenials,
+          fast_mode_state: getFastModeState(
+            mainLoopModel,
+            initialAppState.fastMode,
+          ),
+          uuid: randomUUID(),
+          errors: [`Reached maximum budget ($${maxBudgetUsd})`],
+        }
+        return
+      }
+
+      // Check if structured output retry limit exceeded (only on user messages)
+      if (message.type === 'user' && jsonSchema) {
+        const currentCalls = countToolCalls(
+          this.mutableMessages,
+          SYNTHETIC_OUTPUT_TOOL_NAME,
+        )
+        const callsThisQuery = currentCalls - initialStructuredOutputCalls
+        const maxRetries = parseInt(
+          process.env.MAX_STRUCTURED_OUTPUT_RETRIES || '5',
+          10,
+        )
+        if (callsThisQuery >= maxRetries) {
+          if (persistSession) {
+            if (
+              isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+              isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+            ) {
+              await flushSessionStorage()
+            }
+          }
+          yield {
+            type: 'result',
+            subtype: 'error_max_structured_output_retries',
+            duration_ms: Date.now() - startTime,
+            duration_api_ms: getTotalAPIDuration(),
+            is_error: true,
+            num_turns: turnCount,
+            stop_reason: lastStopReason,
+            session_id: getSessionId(),
+            total_cost_usd: getTotalCost(),
+            usage: this.totalUsage,
+            modelUsage: getModelUsage(),
+            permission_denials: this.permissionDenials,
+            fast_mode_state: getFastModeState(
+              mainLoopModel,
+              initialAppState.fastMode,
+            ),
+            uuid: randomUUID(),
+            errors: [
+              `Failed to provide valid structured output after ${maxRetries} attempts`,
+            ],
+          }
+          return
+        }
+      }
+    }
+
+    // Stop hooks yield progress/attachment messages AFTER the assistant
+    // response (via yield* handleStopHooks in query.ts). Since #23537 pushes
+    // those to `messages` inline, last(messages) can be a progress/attachment
+    // instead of the assistant — which makes textResult extraction below
+    // return '' and -p mode emit a blank line. Allowlist to assistant|user:
+    // isResultSuccessful handles both (user with all tool_result blocks is a
+    // valid successful terminal state).
+    const result = messages.findLast(
+      m => m.type === 'assistant' || m.type === 'user',
+    )
+    // Capture for the error_during_execution diagnostic — isResultSuccessful
+    // is a type predicate (message is Message), so inside the false branch
+    // `result` narrows to never and these accesses don't typecheck.
+    const edeResultType = result?.type ?? 'undefined'
+    const edeLastContentType =
+      result?.type === 'assistant'
+        ? (last(result.message.content)?.type ?? 'none')
+        : 'n/a'
+
+    // Flush buffered transcript writes before yielding result.
+    // The desktop app kills the CLI process immediately after receiving the
+    // result message, so any unflushed writes would be lost.
+    if (persistSession) {
+      if (
+        isEnvTruthy(process.env.CLAUDE_CODE_EAGER_FLUSH) ||
+        isEnvTruthy(process.env.CLAUDE_CODE_IS_COWORK)
+      ) {
+        await flushSessionStorage()
+      }
+    }
+
+    if (!isResultSuccessful(result, lastStopReason)) {
+      yield {
+        type: 'result',
+        subtype: 'error_during_execution',
+        duration_ms: Date.now() - startTime,
+        duration_api_ms: getTotalAPIDuration(),
+        is_error: true,
+        num_turns: turnCount,
+        stop_reason: lastStopReason,
+        session_id: getSessionId(),
+        total_cost_usd: getTotalCost(),
+        usage: this.totalUsage,
+        modelUsage: getModelUsage(),
+        permission_denials: this.permissionDenials,
+        fast_mode_state: getFastModeState(
+          mainLoopModel,
+          initialAppState.fastMode,
+        ),
+        uuid: randomUUID(),
+        // Diagnostic prefix: these are what isResultSuccessful() checks — if
+        // the result type isn't assistant-with-text/thinking or user-with-
+        // tool_result, and stop_reason isn't end_turn, that's why this fired.
+        // errors[] is turn-scoped via the watermark; previously it dumped the
+        // entire process's logError buffer (ripgrep timeouts, ENOENT, etc).
+        errors: (() => {
+          const all = getInMemoryErrors()
+          const start = errorLogWatermark
+            ? all.lastIndexOf(errorLogWatermark) + 1
+            : 0
+          return [
+            `[ede_diagnostic] result_type=${edeResultType} last_content_type=${edeLastContentType} stop_reason=${lastStopReason}`,
+            ...all.slice(start).map(_ => _.error),
+          ]
+        })(),
+      }
+      return
+    }
+
+    // Extract the text result based on message type
+    let textResult = ''
+    let isApiError = false
+
+    if (result.type === 'assistant') {
+      const lastContent = last(result.message.content)
+      if (
+        lastContent?.type === 'text' &&
+        !SYNTHETIC_MESSAGES.has(lastContent.text)
+      ) {
+        textResult = lastContent.text
+      }
+      isApiError = Boolean(result.isApiErrorMessage)
+    }
+
+    yield {
+      type: 'result',
+      subtype: 'success',
+      is_error: isApiError,
+      duration_ms: Date.now() - startTime,
+      duration_api_ms: getTotalAPIDuration(),
+      num_turns: turnCount,
+      result: textResult,
+      stop_reason: lastStopReason,
+      session_id: getSessionId(),
+      total_cost_usd: getTotalCost(),
+      usage: this.totalUsage,
+      modelUsage: getModelUsage(),
+      permission_denials: this.permissionDenials,
+      structured_output: structuredOutputFromTool,
+      fast_mode_state: getFastModeState(
+        mainLoopModel,
+        initialAppState.fastMode,
+      ),
+      uuid: randomUUID(),
+    }
+  }
+
+  interrupt(): void {
+    this.abortController.abort()
+  }
+
+  getMessages(): readonly Message[] {
+    return this.mutableMessages
+  }
+
+  getReadFileState(): FileStateCache {
+    return this.readFileState
+  }
+
+  getSessionId(): string {
+    return getSessionId()
+  }
+
+  setModel(model: string): void {
+    this.config.userSpecifiedModel = model
+  }
+}
+
+/**
+ * Sends a single prompt to the Claude API and returns the response.
+ * Assumes that claude is being used non-interactively -- will not
+ * ask the user for permissions or further input.
+ *
+ * Convenience wrapper around QueryEngine for one-shot usage.
+ */
+export async function* ask({
+  commands,
+  prompt,
+  promptUuid,
+  isMeta,
+  cwd,
+  tools,
+  mcpClients,
+  verbose = false,
+  thinkingConfig,
+  maxTurns,
+  maxBudgetUsd,
+  taskBudget,
+  canUseTool,
+  mutableMessages = [],
+  getReadFileCache,
+  setReadFileCache,
+  customSystemPrompt,
+  appendSystemPrompt,
+  userSpecifiedModel,
+  fallbackModel,
+  jsonSchema,
+  getAppState,
+  setAppState,
+  abortController,
+  replayUserMessages = false,
+  includePartialMessages = false,
+  handleElicitation,
+  agents = [],
+  setSDKStatus,
+  orphanedPermission,
+}: {
+  commands: Command[]
+  prompt: string | Array<ContentBlockParam>
+  promptUuid?: string
+  isMeta?: boolean
+  cwd: string
+  tools: Tools
+  verbose?: boolean
+  mcpClients: MCPServerConnection[]
+  thinkingConfig?: ThinkingConfig
+  maxTurns?: number
+  maxBudgetUsd?: number
+  taskBudget?: { total: number }
+  canUseTool: CanUseToolFn
+  mutableMessages?: Message[]
+  customSystemPrompt?: string
+  appendSystemPrompt?: string
+  userSpecifiedModel?: string
+  fallbackModel?: string
+  jsonSchema?: Record<string, unknown>
+  getAppState: () => AppState
+  setAppState: (f: (prev: AppState) => AppState) => void
+  getReadFileCache: () => FileStateCache
+  setReadFileCache: (cache: FileStateCache) => void
+  abortController?: AbortController
+  replayUserMessages?: boolean
+  includePartialMessages?: boolean
+  handleElicitation?: ToolUseContext['handleElicitation']
+  agents?: AgentDefinition[]
+  setSDKStatus?: (status: SDKStatus) => void
+  orphanedPermission?: OrphanedPermission
+}): AsyncGenerator<SDKMessage, void, unknown> {
+  const engine = new QueryEngine({
+    cwd,
+    tools,
+    commands,
+    mcpClients,
+    agents,
+    canUseTool,
+    getAppState,
+    setAppState,
+    initialMessages: mutableMessages,
+    readFileCache: cloneFileStateCache(getReadFileCache()),
+    customSystemPrompt,
+    appendSystemPrompt,
+    userSpecifiedModel,
+    fallbackModel,
+    thinkingConfig,
+    maxTurns,
+    maxBudgetUsd,
+    taskBudget,
+    jsonSchema,
+    verbose,
+    handleElicitation,
+    replayUserMessages,
+    includePartialMessages,
+    setSDKStatus,
+    abortController,
+    orphanedPermission,
+    ...(feature('HISTORY_SNIP')
+      ? {
+          snipReplay: (yielded: Message, store: Message[]) => {
+            if (!snipProjection!.isSnipBoundaryMessage(yielded))
+              return undefined
+            return snipModule!.snipCompactIfNeeded(store, { force: true })
+          },
+        }
+      : {}),
+  })
+
+  try {
+    yield* engine.submitMessage(prompt, {
+      uuid: promptUuid,
+      isMeta,
+    })
+  } finally {
+    setReadFileCache(engine.getReadFileState())
+  }
+}

+ 125 - 0
src/Task.ts

@@ -0,0 +1,125 @@
+import { randomBytes } from 'crypto'
+import type { AppState } from './state/AppState.js'
+import type { AgentId } from './types/ids.js'
+import { getTaskOutputPath } from './utils/task/diskOutput.js'
+
+export type TaskType =
+  | 'local_bash'
+  | 'local_agent'
+  | 'remote_agent'
+  | 'in_process_teammate'
+  | 'local_workflow'
+  | 'monitor_mcp'
+  | 'dream'
+
+export type TaskStatus =
+  | 'pending'
+  | 'running'
+  | 'completed'
+  | 'failed'
+  | 'killed'
+
+/**
+ * True when a task is in a terminal state and will not transition further.
+ * Used to guard against injecting messages into dead teammates, evicting
+ * finished tasks from AppState, and orphan-cleanup paths.
+ */
+export function isTerminalTaskStatus(status: TaskStatus): boolean {
+  return status === 'completed' || status === 'failed' || status === 'killed'
+}
+
+export type TaskHandle = {
+  taskId: string
+  cleanup?: () => void
+}
+
+export type SetAppState = (f: (prev: AppState) => AppState) => void
+
+export type TaskContext = {
+  abortController: AbortController
+  getAppState: () => AppState
+  setAppState: SetAppState
+}
+
+// Base fields shared by all task states
+export type TaskStateBase = {
+  id: string
+  type: TaskType
+  status: TaskStatus
+  description: string
+  toolUseId?: string
+  startTime: number
+  endTime?: number
+  totalPausedMs?: number
+  outputFile: string
+  outputOffset: number
+  notified: boolean
+}
+
+export type LocalShellSpawnInput = {
+  command: string
+  description: string
+  timeout?: number
+  toolUseId?: string
+  agentId?: AgentId
+  /** UI display variant: description-as-label, dialog title, status bar pill. */
+  kind?: 'bash' | 'monitor'
+}
+
+// What getTaskByType dispatches for: kill. spawn/render were never
+// called polymorphically (removed in #22546). All six kill implementations
+// use only setAppState — getAppState/abortController were dead weight.
+export type Task = {
+  name: string
+  type: TaskType
+  kill(taskId: string, setAppState: SetAppState): Promise<void>
+}
+
+// Task ID prefixes
+const TASK_ID_PREFIXES: Record<string, string> = {
+  local_bash: 'b', // Keep as 'b' for backward compatibility
+  local_agent: 'a',
+  remote_agent: 'r',
+  in_process_teammate: 't',
+  local_workflow: 'w',
+  monitor_mcp: 'm',
+  dream: 'd',
+}
+
+// Get task ID prefix
+function getTaskIdPrefix(type: TaskType): string {
+  return TASK_ID_PREFIXES[type] ?? 'x'
+}
+
+// Case-insensitive-safe alphabet (digits + lowercase) for task IDs.
+// 36^8 ≈ 2.8 trillion combinations, sufficient to resist brute-force symlink attacks.
+const TASK_ID_ALPHABET = '0123456789abcdefghijklmnopqrstuvwxyz'
+
+export function generateTaskId(type: TaskType): string {
+  const prefix = getTaskIdPrefix(type)
+  const bytes = randomBytes(8)
+  let id = prefix
+  for (let i = 0; i < 8; i++) {
+    id += TASK_ID_ALPHABET[bytes[i]! % TASK_ID_ALPHABET.length]
+  }
+  return id
+}
+
+export function createTaskStateBase(
+  id: string,
+  type: TaskType,
+  description: string,
+  toolUseId?: string,
+): TaskStateBase {
+  return {
+    id,
+    type,
+    status: 'pending',
+    description,
+    toolUseId,
+    startTime: Date.now(),
+    outputFile: getTaskOutputPath(id),
+    outputOffset: 0,
+    notified: false,
+  }
+}

+ 792 - 0
src/Tool.ts

@@ -0,0 +1,792 @@
+import type {
+  ToolResultBlockParam,
+  ToolUseBlockParam,
+} from '@anthropic-ai/sdk/resources/index.mjs'
+import type {
+  ElicitRequestURLParams,
+  ElicitResult,
+} from '@modelcontextprotocol/sdk/types.js'
+import type { UUID } from 'crypto'
+import type { z } from 'zod/v4'
+import type { Command } from './commands.js'
+import type { CanUseToolFn } from './hooks/useCanUseTool.js'
+import type { ThinkingConfig } from './utils/thinking.js'
+
+export type ToolInputJSONSchema = {
+  [x: string]: unknown
+  type: 'object'
+  properties?: {
+    [x: string]: unknown
+  }
+}
+
+import type { Notification } from './context/notifications.js'
+import type {
+  MCPServerConnection,
+  ServerResource,
+} from './services/mcp/types.js'
+import type {
+  AgentDefinition,
+  AgentDefinitionsResult,
+} from './tools/AgentTool/loadAgentsDir.js'
+import type {
+  AssistantMessage,
+  AttachmentMessage,
+  Message,
+  ProgressMessage,
+  SystemLocalCommandMessage,
+  SystemMessage,
+  UserMessage,
+} from './types/message.js'
+// Import permission types from centralized location to break import cycles
+// Import PermissionResult from centralized location to break import cycles
+import type {
+  AdditionalWorkingDirectory,
+  PermissionMode,
+  PermissionResult,
+} from './types/permissions.js'
+// Import tool progress types from centralized location to break import cycles
+import type {
+  AgentToolProgress,
+  BashProgress,
+  MCPProgress,
+  REPLToolProgress,
+  SkillToolProgress,
+  TaskOutputProgress,
+  ToolProgressData,
+  WebSearchProgress,
+} from './types/tools.js'
+import type { FileStateCache } from './utils/fileStateCache.js'
+import type { DenialTrackingState } from './utils/permissions/denialTracking.js'
+import type { SystemPrompt } from './utils/systemPromptType.js'
+import type { ContentReplacementState } from './utils/toolResultStorage.js'
+
+// Re-export progress types for backwards compatibility
+export type {
+  AgentToolProgress,
+  BashProgress,
+  MCPProgress,
+  REPLToolProgress,
+  SkillToolProgress,
+  TaskOutputProgress,
+  WebSearchProgress,
+}
+
+import type { SpinnerMode } from './components/Spinner.js'
+import type { QuerySource } from './constants/querySource.js'
+import type { SDKStatus } from './entrypoints/agentSdkTypes.js'
+import type { AppState } from './state/AppState.js'
+import type {
+  HookProgress,
+  PromptRequest,
+  PromptResponse,
+} from './types/hooks.js'
+import type { AgentId } from './types/ids.js'
+import type { DeepImmutable } from './types/utils.js'
+import type { AttributionState } from './utils/commitAttribution.js'
+import type { FileHistoryState } from './utils/fileHistory.js'
+import type { Theme, ThemeName } from './utils/theme.js'
+
+export type QueryChainTracking = {
+  chainId: string
+  depth: number
+}
+
+export type ValidationResult =
+  | { result: true }
+  | {
+      result: false
+      message: string
+      errorCode: number
+    }
+
+export type SetToolJSXFn = (
+  args: {
+    jsx: React.ReactNode | null
+    shouldHidePromptInput: boolean
+    shouldContinueAnimation?: true
+    showSpinner?: boolean
+    isLocalJSXCommand?: boolean
+    isImmediate?: boolean
+    /** Set to true to clear a local JSX command (e.g., from its onDone callback) */
+    clearLocalJSX?: boolean
+  } | null,
+) => void
+
+// Import tool permission types from centralized location to break import cycles
+import type { ToolPermissionRulesBySource } from './types/permissions.js'
+
+// Re-export for backwards compatibility
+export type { ToolPermissionRulesBySource }
+
+// Apply DeepImmutable to the imported type
+export type ToolPermissionContext = DeepImmutable<{
+  mode: PermissionMode
+  additionalWorkingDirectories: Map<string, AdditionalWorkingDirectory>
+  alwaysAllowRules: ToolPermissionRulesBySource
+  alwaysDenyRules: ToolPermissionRulesBySource
+  alwaysAskRules: ToolPermissionRulesBySource
+  isBypassPermissionsModeAvailable: boolean
+  isAutoModeAvailable?: boolean
+  strippedDangerousRules?: ToolPermissionRulesBySource
+  /** When true, permission prompts are auto-denied (e.g., background agents that can't show UI) */
+  shouldAvoidPermissionPrompts?: boolean
+  /** When true, automated checks (classifier, hooks) are awaited before showing the permission dialog (coordinator workers) */
+  awaitAutomatedChecksBeforeDialog?: boolean
+  /** Stores the permission mode before model-initiated plan mode entry, so it can be restored on exit */
+  prePlanMode?: PermissionMode
+}>
+
+export const getEmptyToolPermissionContext: () => ToolPermissionContext =
+  () => ({
+    mode: 'default',
+    additionalWorkingDirectories: new Map(),
+    alwaysAllowRules: {},
+    alwaysDenyRules: {},
+    alwaysAskRules: {},
+    isBypassPermissionsModeAvailable: false,
+  })
+
+export type CompactProgressEvent =
+  | {
+      type: 'hooks_start'
+      hookType: 'pre_compact' | 'post_compact' | 'session_start'
+    }
+  | { type: 'compact_start' }
+  | { type: 'compact_end' }
+
+export type ToolUseContext = {
+  options: {
+    commands: Command[]
+    debug: boolean
+    mainLoopModel: string
+    tools: Tools
+    verbose: boolean
+    thinkingConfig: ThinkingConfig
+    mcpClients: MCPServerConnection[]
+    mcpResources: Record<string, ServerResource[]>
+    isNonInteractiveSession: boolean
+    agentDefinitions: AgentDefinitionsResult
+    maxBudgetUsd?: number
+    /** Custom system prompt that replaces the default system prompt */
+    customSystemPrompt?: string
+    /** Additional system prompt appended after the main system prompt */
+    appendSystemPrompt?: string
+    /** Override querySource for analytics tracking */
+    querySource?: QuerySource
+    /** Optional callback to get the latest tools (e.g., after MCP servers connect mid-query) */
+    refreshTools?: () => Tools
+  }
+  abortController: AbortController
+  readFileState: FileStateCache
+  getAppState(): AppState
+  setAppState(f: (prev: AppState) => AppState): void
+  /**
+   * Always-shared setAppState for session-scoped infrastructure (background
+   * tasks, session hooks). Unlike setAppState, which is no-op for async agents
+   * (see createSubagentContext), this always reaches the root store so agents
+   * at any nesting depth can register/clean up infrastructure that outlives
+   * a single turn. Only set by createSubagentContext; main-thread contexts
+   * fall back to setAppState.
+   */
+  setAppStateForTasks?: (f: (prev: AppState) => AppState) => void
+  /**
+   * Optional handler for URL elicitations triggered by tool call errors (-32042).
+   * In print/SDK mode, this delegates to structuredIO.handleElicitation.
+   * In REPL mode, this is undefined and the queue-based UI path is used.
+   */
+  handleElicitation?: (
+    serverName: string,
+    params: ElicitRequestURLParams,
+    signal: AbortSignal,
+  ) => Promise<ElicitResult>
+  setToolJSX?: SetToolJSXFn
+  addNotification?: (notif: Notification) => void
+  /** Append a UI-only system message to the REPL message list. Stripped at the
+   *  normalizeMessagesForAPI boundary — the Exclude<> makes that type-enforced. */
+  appendSystemMessage?: (
+    msg: Exclude<SystemMessage, SystemLocalCommandMessage>,
+  ) => void
+  /** Send an OS-level notification (iTerm2, Kitty, Ghostty, bell, etc.) */
+  sendOSNotification?: (opts: {
+    message: string
+    notificationType: string
+  }) => void
+  nestedMemoryAttachmentTriggers?: Set<string>
+  /**
+   * CLAUDE.md paths already injected as nested_memory attachments this
+   * session. Dedup for memoryFilesToAttachments — readFileState is an LRU
+   * that evicts entries in busy sessions, so its .has() check alone can
+   * re-inject the same CLAUDE.md dozens of times.
+   */
+  loadedNestedMemoryPaths?: Set<string>
+  dynamicSkillDirTriggers?: Set<string>
+  /** Skill names surfaced via skill_discovery this session. Telemetry only (feeds was_discovered). */
+  discoveredSkillNames?: Set<string>
+  userModified?: boolean
+  setInProgressToolUseIDs: (f: (prev: Set<string>) => Set<string>) => void
+  /** Only wired in interactive (REPL) contexts; SDK/QueryEngine don't set this. */
+  setHasInterruptibleToolInProgress?: (v: boolean) => void
+  setResponseLength: (f: (prev: number) => number) => void
+  /** Ant-only: push a new API metrics entry for OTPS tracking.
+   *  Called by subagent streaming when a new API request starts. */
+  pushApiMetricsEntry?: (ttftMs: number) => void
+  setStreamMode?: (mode: SpinnerMode) => void
+  onCompactProgress?: (event: CompactProgressEvent) => void
+  setSDKStatus?: (status: SDKStatus) => void
+  openMessageSelector?: () => void
+  updateFileHistoryState: (
+    updater: (prev: FileHistoryState) => FileHistoryState,
+  ) => void
+  updateAttributionState: (
+    updater: (prev: AttributionState) => AttributionState,
+  ) => void
+  setConversationId?: (id: UUID) => void
+  agentId?: AgentId // Only set for subagents; use getSessionId() for session ID. Hooks use this to distinguish subagent calls.
+  agentType?: string // Subagent type name. For the main thread's --agent type, hooks fall back to getMainThreadAgentType().
+  /** When true, canUseTool must always be called even when hooks auto-approve.
+   *  Used by speculation for overlay file path rewriting. */
+  requireCanUseTool?: boolean
+  messages: Message[]
+  fileReadingLimits?: {
+    maxTokens?: number
+    maxSizeBytes?: number
+  }
+  globLimits?: {
+    maxResults?: number
+  }
+  toolDecisions?: Map<
+    string,
+    {
+      source: string
+      decision: 'accept' | 'reject'
+      timestamp: number
+    }
+  >
+  queryTracking?: QueryChainTracking
+  /** Callback factory for requesting interactive prompts from the user.
+   * Returns a prompt callback bound to the given source name.
+   * Only available in interactive (REPL) contexts. */
+  requestPrompt?: (
+    sourceName: string,
+    toolInputSummary?: string | null,
+  ) => (request: PromptRequest) => Promise<PromptResponse>
+  toolUseId?: string
+  criticalSystemReminder_EXPERIMENTAL?: string
+  /** When true, preserve toolUseResult on messages even for subagents.
+   * Used by in-process teammates whose transcripts are viewable by the user. */
+  preserveToolUseResults?: boolean
+  /** Local denial tracking state for async subagents whose setAppState is a
+   *  no-op. Without this, the denial counter never accumulates and the
+   *  fallback-to-prompting threshold is never reached. Mutable — the
+   *  permissions code updates it in place. */
+  localDenialTracking?: DenialTrackingState
+  /**
+   * Per-conversation-thread content replacement state for the tool result
+   * budget. When present, query.ts applies the aggregate tool result budget.
+   * Main thread: REPL provisions once (never resets — stale UUID keys
+   * are inert). Subagents: createSubagentContext clones the parent's state
+   * by default (cache-sharing forks need identical decisions), or
+   * resumeAgentBackground threads one reconstructed from sidechain records.
+   */
+  contentReplacementState?: ContentReplacementState
+  /**
+   * Parent's rendered system prompt bytes, frozen at turn start.
+   * Used by fork subagents to share the parent's prompt cache — re-calling
+   * getSystemPrompt() at fork-spawn time can diverge (GrowthBook cold→warm)
+   * and bust the cache. See forkSubagent.ts.
+   */
+  renderedSystemPrompt?: SystemPrompt
+}
+
+// Re-export ToolProgressData from centralized location
+export type { ToolProgressData }
+
+export type Progress = ToolProgressData | HookProgress
+
+export type ToolProgress<P extends ToolProgressData> = {
+  toolUseID: string
+  data: P
+}
+
+export function filterToolProgressMessages(
+  progressMessagesForMessage: ProgressMessage[],
+): ProgressMessage<ToolProgressData>[] {
+  return progressMessagesForMessage.filter(
+    (msg): msg is ProgressMessage<ToolProgressData> =>
+      msg.data?.type !== 'hook_progress',
+  )
+}
+
+export type ToolResult<T> = {
+  data: T
+  newMessages?: (
+    | UserMessage
+    | AssistantMessage
+    | AttachmentMessage
+    | SystemMessage
+  )[]
+  // contextModifier is only honored for tools that aren't concurrency safe.
+  contextModifier?: (context: ToolUseContext) => ToolUseContext
+  /** MCP protocol metadata (structuredContent, _meta) to pass through to SDK consumers */
+  mcpMeta?: {
+    _meta?: Record<string, unknown>
+    structuredContent?: Record<string, unknown>
+  }
+}
+
+export type ToolCallProgress<P extends ToolProgressData = ToolProgressData> = (
+  progress: ToolProgress<P>,
+) => void
+
+// Type for any schema that outputs an object with string keys
+export type AnyObject = z.ZodType<{ [key: string]: unknown }>
+
+/**
+ * Checks if a tool matches the given name (primary name or alias).
+ */
+export function toolMatchesName(
+  tool: { name: string; aliases?: string[] },
+  name: string,
+): boolean {
+  return tool.name === name || (tool.aliases?.includes(name) ?? false)
+}
+
+/**
+ * Finds a tool by name or alias from a list of tools.
+ */
+export function findToolByName(tools: Tools, name: string): Tool | undefined {
+  return tools.find(t => toolMatchesName(t, name))
+}
+
+export type Tool<
+  Input extends AnyObject = AnyObject,
+  Output = unknown,
+  P extends ToolProgressData = ToolProgressData,
+> = {
+  /**
+   * Optional aliases for backwards compatibility when a tool is renamed.
+   * The tool can be looked up by any of these names in addition to its primary name.
+   */
+  aliases?: string[]
+  /**
+   * One-line capability phrase used by ToolSearch for keyword matching.
+   * Helps the model find this tool via keyword search when it's deferred.
+   * 3–10 words, no trailing period.
+   * Prefer terms not already in the tool name (e.g. 'jupyter' for NotebookEdit).
+   */
+  searchHint?: string
+  call(
+    args: z.infer<Input>,
+    context: ToolUseContext,
+    canUseTool: CanUseToolFn,
+    parentMessage: AssistantMessage,
+    onProgress?: ToolCallProgress<P>,
+  ): Promise<ToolResult<Output>>
+  description(
+    input: z.infer<Input>,
+    options: {
+      isNonInteractiveSession: boolean
+      toolPermissionContext: ToolPermissionContext
+      tools: Tools
+    },
+  ): Promise<string>
+  readonly inputSchema: Input
+  // Type for MCP tools that can specify their input schema directly in JSON Schema format
+  // rather than converting from Zod schema
+  readonly inputJSONSchema?: ToolInputJSONSchema
+  // Optional because TungstenTool doesn't define this. TODO: Make it required.
+  // When we do that, we can also go through and make this a bit more type-safe.
+  outputSchema?: z.ZodType<unknown>
+  inputsEquivalent?(a: z.infer<Input>, b: z.infer<Input>): boolean
+  isConcurrencySafe(input: z.infer<Input>): boolean
+  isEnabled(): boolean
+  isReadOnly(input: z.infer<Input>): boolean
+  /** Defaults to false. Only set when the tool performs irreversible operations (delete, overwrite, send). */
+  isDestructive?(input: z.infer<Input>): boolean
+  /**
+   * What should happen when the user submits a new message while this tool
+   * is running.
+   *
+   * - `'cancel'` — stop the tool and discard its result
+   * - `'block'`  — keep running; the new message waits
+   *
+   * Defaults to `'block'` when not implemented.
+   */
+  interruptBehavior?(): 'cancel' | 'block'
+  /**
+   * Returns information about whether this tool use is a search or read operation
+   * that should be collapsed into a condensed display in the UI. Examples include
+   * file searching (Grep, Glob), file reading (Read), and bash commands like find,
+   * grep, wc, etc.
+   *
+   * Returns an object indicating whether the operation is a search or read operation:
+   * - `isSearch: true` for search operations (grep, find, glob patterns)
+   * - `isRead: true` for read operations (cat, head, tail, file read)
+   * - `isList: true` for directory-listing operations (ls, tree, du)
+   * - All can be false if the operation shouldn't be collapsed
+   */
+  isSearchOrReadCommand?(input: z.infer<Input>): {
+    isSearch: boolean
+    isRead: boolean
+    isList?: boolean
+  }
+  isOpenWorld?(input: z.infer<Input>): boolean
+  requiresUserInteraction?(): boolean
+  isMcp?: boolean
+  isLsp?: boolean
+  /**
+   * When true, this tool is deferred (sent with defer_loading: true) and requires
+   * ToolSearch to be used before it can be called.
+   */
+  readonly shouldDefer?: boolean
+  /**
+   * When true, this tool is never deferred — its full schema appears in the
+   * initial prompt even when ToolSearch is enabled. For MCP tools, set via
+   * `_meta['anthropic/alwaysLoad']`. Use for tools the model must see on
+   * turn 1 without a ToolSearch round-trip.
+   */
+  readonly alwaysLoad?: boolean
+  /**
+   * For MCP tools: the server and tool names as received from the MCP server (unnormalized).
+   * Present on all MCP tools regardless of whether `name` is prefixed (mcp__server__tool)
+   * or unprefixed (CLAUDE_AGENT_SDK_MCP_NO_PREFIX mode).
+   */
+  mcpInfo?: { serverName: string; toolName: string }
+  readonly name: string
+  /**
+   * Maximum size in characters for tool result before it gets persisted to disk.
+   * When exceeded, the result is saved to a file and Claude receives a preview
+   * with the file path instead of the full content.
+   *
+   * Set to Infinity for tools whose output must never be persisted (e.g. Read,
+   * where persisting creates a circular Read→file→Read loop and the tool
+   * already self-bounds via its own limits).
+   */
+  maxResultSizeChars: number
+  /**
+   * When true, enables strict mode for this tool, which causes the API to
+   * more strictly adhere to tool instructions and parameter schemas.
+   * Only applied when the tengu_tool_pear is enabled.
+   */
+  readonly strict?: boolean
+
+  /**
+   * Called on copies of tool_use input before observers see it (SDK stream,
+   * transcript, canUseTool, PreToolUse/PostToolUse hooks). Mutate in place
+   * to add legacy/derived fields. Must be idempotent. The original API-bound
+   * input is never mutated (preserves prompt cache). Not re-applied when a
+   * hook/permission returns a fresh updatedInput — those own their shape.
+   */
+  backfillObservableInput?(input: Record<string, unknown>): void
+
+  /**
+   * Determines if this tool is allowed to run with this input in the current context.
+   * It informs the model of why the tool use failed, and does not directly display any UI.
+   * @param input
+   * @param context
+   */
+  validateInput?(
+    input: z.infer<Input>,
+    context: ToolUseContext,
+  ): Promise<ValidationResult>
+
+  /**
+   * Determines if the user is asked for permission. Only called after validateInput() passes.
+   * General permission logic is in permissions.ts. This method contains tool-specific logic.
+   * @param input
+   * @param context
+   */
+  checkPermissions(
+    input: z.infer<Input>,
+    context: ToolUseContext,
+  ): Promise<PermissionResult>
+
+  // Optional method for tools that operate on a file path
+  getPath?(input: z.infer<Input>): string
+
+  /**
+   * Prepare a matcher for hook `if` conditions (permission-rule patterns like
+   * "git *" from "Bash(git *)"). Called once per hook-input pair; any
+   * expensive parsing happens here. Returns a closure that is called per
+   * hook pattern. If not implemented, only tool-name-level matching works.
+   */
+  preparePermissionMatcher?(
+    input: z.infer<Input>,
+  ): Promise<(pattern: string) => boolean>
+
+  prompt(options: {
+    getToolPermissionContext: () => Promise<ToolPermissionContext>
+    tools: Tools
+    agents: AgentDefinition[]
+    allowedAgentTypes?: string[]
+  }): Promise<string>
+  userFacingName(input: Partial<z.infer<Input>> | undefined): string
+  userFacingNameBackgroundColor?(
+    input: Partial<z.infer<Input>> | undefined,
+  ): keyof Theme | undefined
+  /**
+   * Transparent wrappers (e.g. REPL) delegate all rendering to their progress
+   * handler, which emits native-looking blocks for each inner tool call.
+   * The wrapper itself shows nothing.
+   */
+  isTransparentWrapper?(): boolean
+  /**
+   * Returns a short string summary of this tool use for display in compact views.
+   * @param input The tool input
+   * @returns A short string summary, or null to not display
+   */
+  getToolUseSummary?(input: Partial<z.infer<Input>> | undefined): string | null
+  /**
+   * Returns a human-readable present-tense activity description for spinner display.
+   * Example: "Reading src/foo.ts", "Running bun test", "Searching for pattern"
+   * @param input The tool input
+   * @returns Activity description string, or null to fall back to tool name
+   */
+  getActivityDescription?(
+    input: Partial<z.infer<Input>> | undefined,
+  ): string | null
+  /**
+   * Returns a compact representation of this tool use for the auto-mode
+   * security classifier. Examples: `ls -la` for Bash, `/tmp/x: new content`
+   * for Edit. Return '' to skip this tool in the classifier transcript
+   * (e.g. tools with no security relevance). May return an object to avoid
+   * double-encoding when the caller JSON-wraps the value.
+   */
+  toAutoClassifierInput(input: z.infer<Input>): unknown
+  mapToolResultToToolResultBlockParam(
+    content: Output,
+    toolUseID: string,
+  ): ToolResultBlockParam
+  /**
+   * Optional. When omitted, the tool result renders nothing (same as returning
+   * null). Omit for tools whose results are surfaced elsewhere (e.g., TodoWrite
+   * updates the todo panel, not the transcript).
+   */
+  renderToolResultMessage?(
+    content: Output,
+    progressMessagesForMessage: ProgressMessage<P>[],
+    options: {
+      style?: 'condensed'
+      theme: ThemeName
+      tools: Tools
+      verbose: boolean
+      isTranscriptMode?: boolean
+      isBriefOnly?: boolean
+      /** Original tool_use input, when available. Useful for compact result
+       * summaries that reference what was requested (e.g. "Sent to #foo"). */
+      input?: unknown
+    },
+  ): React.ReactNode
+  /**
+   * Flattened text of what renderToolResultMessage shows IN TRANSCRIPT
+   * MODE (verbose=true, isTranscriptMode=true). For transcript search
+   * indexing: the index counts occurrences in this string, the highlight
+   * overlay scans the actual screen buffer. For count ≡ highlight, this
+   * must return the text that ends up visible — not the model-facing
+   * serialization from mapToolResultToToolResultBlockParam (which adds
+   * system-reminders, persisted-output wrappers).
+   *
+   * Chrome can be skipped (under-count is fine). "Found 3 files in 12ms"
+   * isn't worth indexing. Phantoms are not fine — text that's claimed
+   * here but doesn't render is a count≠highlight bug.
+   *
+   * Optional: omitted → field-name heuristic in transcriptSearch.ts.
+   * Drift caught by test/utils/transcriptSearch.renderFidelity.test.tsx
+   * which renders sample outputs and flags text that's indexed-but-not-
+   * rendered (phantom) or rendered-but-not-indexed (under-count warning).
+   */
+  extractSearchText?(out: Output): string
+  /**
+   * Render the tool use message. Note that `input` is partial because we render
+   * the message as soon as possible, possibly before tool parameters have fully
+   * streamed in.
+   */
+  renderToolUseMessage(
+    input: Partial<z.infer<Input>>,
+    options: { theme: ThemeName; verbose: boolean; commands?: Command[] },
+  ): React.ReactNode
+  /**
+   * Returns true when the non-verbose rendering of this output is truncated
+   * (i.e., clicking to expand would reveal more content). Gates
+   * click-to-expand in fullscreen — only messages where verbose actually
+   * shows more get a hover/click affordance. Unset means never truncated.
+   */
+  isResultTruncated?(output: Output): boolean
+  /**
+   * Renders an optional tag to display after the tool use message.
+   * Used for additional metadata like timeout, model, resume ID, etc.
+   * Returns null to not display anything.
+   */
+  renderToolUseTag?(input: Partial<z.infer<Input>>): React.ReactNode
+  /**
+   * Optional. When omitted, no progress UI is shown while the tool runs.
+   */
+  renderToolUseProgressMessage?(
+    progressMessagesForMessage: ProgressMessage<P>[],
+    options: {
+      tools: Tools
+      verbose: boolean
+      terminalSize?: { columns: number; rows: number }
+      inProgressToolCallCount?: number
+      isTranscriptMode?: boolean
+    },
+  ): React.ReactNode
+  renderToolUseQueuedMessage?(): React.ReactNode
+  /**
+   * Optional. When omitted, falls back to <FallbackToolUseRejectedMessage />.
+   * Only define this for tools that need custom rejection UI (e.g., file edits
+   * that show the rejected diff).
+   */
+  renderToolUseRejectedMessage?(
+    input: z.infer<Input>,
+    options: {
+      columns: number
+      messages: Message[]
+      style?: 'condensed'
+      theme: ThemeName
+      tools: Tools
+      verbose: boolean
+      progressMessagesForMessage: ProgressMessage<P>[]
+      isTranscriptMode?: boolean
+    },
+  ): React.ReactNode
+  /**
+   * Optional. When omitted, falls back to <FallbackToolUseErrorMessage />.
+   * Only define this for tools that need custom error UI (e.g., search tools
+   * that show "File not found" instead of the raw error).
+   */
+  renderToolUseErrorMessage?(
+    result: ToolResultBlockParam['content'],
+    options: {
+      progressMessagesForMessage: ProgressMessage<P>[]
+      tools: Tools
+      verbose: boolean
+      isTranscriptMode?: boolean
+    },
+  ): React.ReactNode
+
+  /**
+   * Renders multiple parallel instances of this tool as a group.
+   * @returns React node to render, or null to fall back to individual rendering
+   */
+  /**
+   * Renders multiple tool uses as a group (non-verbose mode only).
+   * In verbose mode, individual tool uses render at their original positions.
+   * @returns React node to render, or null to fall back to individual rendering
+   */
+  renderGroupedToolUse?(
+    toolUses: Array<{
+      param: ToolUseBlockParam
+      isResolved: boolean
+      isError: boolean
+      isInProgress: boolean
+      progressMessages: ProgressMessage<P>[]
+      result?: {
+        param: ToolResultBlockParam
+        output: unknown
+      }
+    }>,
+    options: {
+      shouldAnimate: boolean
+      tools: Tools
+    },
+  ): React.ReactNode | null
+}
+
+/**
+ * A collection of tools. Use this type instead of `Tool[]` to make it easier
+ * to track where tool sets are assembled, passed, and filtered across the codebase.
+ */
+export type Tools = readonly Tool[]
+
+/**
+ * Methods that `buildTool` supplies a default for. A `ToolDef` may omit these;
+ * the resulting `Tool` always has them.
+ */
+type DefaultableToolKeys =
+  | 'isEnabled'
+  | 'isConcurrencySafe'
+  | 'isReadOnly'
+  | 'isDestructive'
+  | 'checkPermissions'
+  | 'toAutoClassifierInput'
+  | 'userFacingName'
+
+/**
+ * Tool definition accepted by `buildTool`. Same shape as `Tool` but with the
+ * defaultable methods optional — `buildTool` fills them in so callers always
+ * see a complete `Tool`.
+ */
+export type ToolDef<
+  Input extends AnyObject = AnyObject,
+  Output = unknown,
+  P extends ToolProgressData = ToolProgressData,
+> = Omit<Tool<Input, Output, P>, DefaultableToolKeys> &
+  Partial<Pick<Tool<Input, Output, P>, DefaultableToolKeys>>
+
+/**
+ * Type-level spread mirroring `{ ...TOOL_DEFAULTS, ...def }`. For each
+ * defaultable key: if D provides it (required), D's type wins; if D omits
+ * it or has it optional (inherited from Partial<> in the constraint), the
+ * default fills in. All other keys come from D verbatim — preserving arity,
+ * optional presence, and literal types exactly as `satisfies Tool` did.
+ */
+type BuiltTool<D> = Omit<D, DefaultableToolKeys> & {
+  [K in DefaultableToolKeys]-?: K extends keyof D
+    ? undefined extends D[K]
+      ? ToolDefaults[K]
+      : D[K]
+    : ToolDefaults[K]
+}
+
+/**
+ * Build a complete `Tool` from a partial definition, filling in safe defaults
+ * for the commonly-stubbed methods. All tool exports should go through this so
+ * that defaults live in one place and callers never need `?.() ?? default`.
+ *
+ * Defaults (fail-closed where it matters):
+ * - `isEnabled` → `true`
+ * - `isConcurrencySafe` → `false` (assume not safe)
+ * - `isReadOnly` → `false` (assume writes)
+ * - `isDestructive` → `false`
+ * - `checkPermissions` → `{ behavior: 'allow', updatedInput }` (defer to general permission system)
+ * - `toAutoClassifierInput` → `''` (skip classifier — security-relevant tools must override)
+ * - `userFacingName` → `name`
+ */
+const TOOL_DEFAULTS = {
+  isEnabled: () => true,
+  isConcurrencySafe: (_input?: unknown) => false,
+  isReadOnly: (_input?: unknown) => false,
+  isDestructive: (_input?: unknown) => false,
+  checkPermissions: (
+    input: { [key: string]: unknown },
+    _ctx?: ToolUseContext,
+  ): Promise<PermissionResult> =>
+    Promise.resolve({ behavior: 'allow', updatedInput: input }),
+  toAutoClassifierInput: (_input?: unknown) => '',
+  userFacingName: (_input?: unknown) => '',
+}
+
+// The defaults type is the ACTUAL shape of TOOL_DEFAULTS (optional params so
+// both 0-arg and full-arg call sites type-check — stubs varied in arity and
+// tests relied on that), not the interface's strict signatures.
+type ToolDefaults = typeof TOOL_DEFAULTS
+
+// D infers the concrete object-literal type from the call site. The
+// constraint provides contextual typing for method parameters; `any` in
+// constraint position is structural and never leaks into the return type.
+// BuiltTool<D> mirrors runtime `{...TOOL_DEFAULTS, ...def}` at the type level.
+// eslint-disable-next-line @typescript-eslint/no-explicit-any
+type AnyToolDef = ToolDef<any, any, any>
+
+export function buildTool<D extends AnyToolDef>(def: D): BuiltTool<D> {
+  // The runtime spread is straightforward; the `as` bridges the gap between
+  // the structural-any constraint and the precise BuiltTool<D> return. The
+  // type semantics are proven by the 0-error typecheck across all 60+ tools.
+  return {
+    ...TOOL_DEFAULTS,
+    userFacingName: () => def.name,
+    ...def,
+  } as BuiltTool<D>
+}

+ 5 - 0
src/assistant/AssistantSessionChooser.tsx

@@ -0,0 +1,5 @@
+// Stub: not included in leak
+import React from 'react';
+export function AssistantSessionChooser(_props: any): React.ReactElement | null {
+  return null;
+}

+ 87 - 0
src/assistant/sessionHistory.ts

@@ -0,0 +1,87 @@
+import axios from 'axios'
+import { getOauthConfig } from '../constants/oauth.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import { logForDebugging } from '../utils/debug.js'
+import { getOAuthHeaders, prepareApiRequest } from '../utils/teleport/api.js'
+
+export const HISTORY_PAGE_SIZE = 100
+
+export type HistoryPage = {
+  /** Chronological order within the page. */
+  events: SDKMessage[]
+  /** Oldest event ID in this page → before_id cursor for next-older page. */
+  firstId: string | null
+  /** true = older events exist. */
+  hasMore: boolean
+}
+
+type SessionEventsResponse = {
+  data: SDKMessage[]
+  has_more: boolean
+  first_id: string | null
+  last_id: string | null
+}
+
+export type HistoryAuthCtx = {
+  baseUrl: string
+  headers: Record<string, string>
+}
+
+/** Prepare auth + headers + base URL once, reuse across pages. */
+export async function createHistoryAuthCtx(
+  sessionId: string,
+): Promise<HistoryAuthCtx> {
+  const { accessToken, orgUUID } = await prepareApiRequest()
+  return {
+    baseUrl: `${getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/events`,
+    headers: {
+      ...getOAuthHeaders(accessToken),
+      'anthropic-beta': 'ccr-byoc-2025-07-29',
+      'x-organization-uuid': orgUUID,
+    },
+  }
+}
+
+async function fetchPage(
+  ctx: HistoryAuthCtx,
+  params: Record<string, string | number | boolean>,
+  label: string,
+): Promise<HistoryPage | null> {
+  const resp = await axios
+    .get<SessionEventsResponse>(ctx.baseUrl, {
+      headers: ctx.headers,
+      params,
+      timeout: 15000,
+      validateStatus: () => true,
+    })
+    .catch(() => null)
+  if (!resp || resp.status !== 200) {
+    logForDebugging(`[${label}] HTTP ${resp?.status ?? 'error'}`)
+    return null
+  }
+  return {
+    events: Array.isArray(resp.data.data) ? resp.data.data : [],
+    firstId: resp.data.first_id,
+    hasMore: resp.data.has_more,
+  }
+}
+
+/**
+ * Newest page: last `limit` events, chronological, via anchor_to_latest.
+ * has_more=true means older events exist.
+ */
+export async function fetchLatestEvents(
+  ctx: HistoryAuthCtx,
+  limit = HISTORY_PAGE_SIZE,
+): Promise<HistoryPage | null> {
+  return fetchPage(ctx, { limit, anchor_to_latest: true }, 'fetchLatestEvents')
+}
+
+/** Older page: events immediately before `beforeId` cursor. */
+export async function fetchOlderEvents(
+  ctx: HistoryAuthCtx,
+  beforeId: string,
+  limit = HISTORY_PAGE_SIZE,
+): Promise<HistoryPage | null> {
+  return fetchPage(ctx, { limit, before_id: beforeId }, 'fetchOlderEvents')
+}

+ 1758 - 0
src/bootstrap/state.ts

@@ -0,0 +1,1758 @@
+import type { BetaMessageStreamParams } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
+import type { Attributes, Meter, MetricOptions } from '@opentelemetry/api'
+import type { logs } from '@opentelemetry/api-logs'
+import type { LoggerProvider } from '@opentelemetry/sdk-logs'
+import type { MeterProvider } from '@opentelemetry/sdk-metrics'
+import type { BasicTracerProvider } from '@opentelemetry/sdk-trace-base'
+import { realpathSync } from 'fs'
+import sumBy from 'lodash-es/sumBy.js'
+import { cwd } from 'process'
+import type { HookEvent, ModelUsage } from 'src/entrypoints/agentSdkTypes.js'
+import type { AgentColorName } from 'src/tools/AgentTool/agentColorManager.js'
+import type { HookCallbackMatcher } from 'src/types/hooks.js'
+// Indirection for browser-sdk build (package.json "browser" field swaps
+// crypto.ts for crypto.browser.ts). Pure leaf re-export of node:crypto —
+// zero circular-dep risk. Path-alias import bypasses bootstrap-isolation
+// (rule only checks ./ and / prefixes); explicit disable documents intent.
+// eslint-disable-next-line custom-rules/bootstrap-isolation
+import { randomUUID } from 'src/utils/crypto.js'
+import type { ModelSetting } from 'src/utils/model/model.js'
+import type { ModelStrings } from 'src/utils/model/modelStrings.js'
+import type { SettingSource } from 'src/utils/settings/constants.js'
+import { resetSettingsCache } from 'src/utils/settings/settingsCache.js'
+import type { PluginHookMatcher } from 'src/utils/settings/types.js'
+import { createSignal } from 'src/utils/signal.js'
+
+// Union type for registered hooks - can be SDK callbacks or native plugin hooks
+type RegisteredHookMatcher = HookCallbackMatcher | PluginHookMatcher
+
+import type { SessionId } from 'src/types/ids.js'
+
+// DO NOT ADD MORE STATE HERE - BE JUDICIOUS WITH GLOBAL STATE
+
+// dev: true on entries that came via --dangerously-load-development-channels.
+// The allowlist gate checks this per-entry (not the session-wide
+// hasDevChannels bit) so passing both flags doesn't let the dev dialog's
+// acceptance leak allowlist-bypass to the --channels entries.
+export type ChannelEntry =
+  | { kind: 'plugin'; name: string; marketplace: string; dev?: boolean }
+  | { kind: 'server'; name: string; dev?: boolean }
+
+export type AttributedCounter = {
+  add(value: number, additionalAttributes?: Attributes): void
+}
+
+type State = {
+  originalCwd: string
+  // Stable project root - set once at startup (including by --worktree flag),
+  // never updated by mid-session EnterWorktreeTool.
+  // Use for project identity (history, skills, sessions) not file operations.
+  projectRoot: string
+  totalCostUSD: number
+  totalAPIDuration: number
+  totalAPIDurationWithoutRetries: number
+  totalToolDuration: number
+  turnHookDurationMs: number
+  turnToolDurationMs: number
+  turnClassifierDurationMs: number
+  turnToolCount: number
+  turnHookCount: number
+  turnClassifierCount: number
+  startTime: number
+  lastInteractionTime: number
+  totalLinesAdded: number
+  totalLinesRemoved: number
+  hasUnknownModelCost: boolean
+  cwd: string
+  modelUsage: { [modelName: string]: ModelUsage }
+  mainLoopModelOverride: ModelSetting | undefined
+  initialMainLoopModel: ModelSetting
+  modelStrings: ModelStrings | null
+  isInteractive: boolean
+  kairosActive: boolean
+  // When true, ensureToolResultPairing throws on mismatch instead of
+  // repairing with synthetic placeholders. HFI opts in at startup so
+  // trajectories fail fast rather than conditioning the model on fake
+  // tool_results.
+  strictToolResultPairing: boolean
+  sdkAgentProgressSummariesEnabled: boolean
+  userMsgOptIn: boolean
+  clientType: string
+  sessionSource: string | undefined
+  questionPreviewFormat: 'markdown' | 'html' | undefined
+  flagSettingsPath: string | undefined
+  flagSettingsInline: Record<string, unknown> | null
+  allowedSettingSources: SettingSource[]
+  sessionIngressToken: string | null | undefined
+  oauthTokenFromFd: string | null | undefined
+  apiKeyFromFd: string | null | undefined
+  // Telemetry state
+  meter: Meter | null
+  sessionCounter: AttributedCounter | null
+  locCounter: AttributedCounter | null
+  prCounter: AttributedCounter | null
+  commitCounter: AttributedCounter | null
+  costCounter: AttributedCounter | null
+  tokenCounter: AttributedCounter | null
+  codeEditToolDecisionCounter: AttributedCounter | null
+  activeTimeCounter: AttributedCounter | null
+  statsStore: { observe(name: string, value: number): void } | null
+  sessionId: SessionId
+  // Parent session ID for tracking session lineage (e.g., plan mode -> implementation)
+  parentSessionId: SessionId | undefined
+  // Logger state
+  loggerProvider: LoggerProvider | null
+  eventLogger: ReturnType<typeof logs.getLogger> | null
+  // Meter provider state
+  meterProvider: MeterProvider | null
+  // Tracer provider state
+  tracerProvider: BasicTracerProvider | null
+  // Agent color state
+  agentColorMap: Map<string, AgentColorName>
+  agentColorIndex: number
+  // Last API request for bug reports
+  lastAPIRequest: Omit<BetaMessageStreamParams, 'messages'> | null
+  // Messages from the last API request (ant-only; reference, not clone).
+  // Captures the exact post-compaction, CLAUDE.md-injected message set sent
+  // to the API so /share's serialized_conversation.json reflects reality.
+  lastAPIRequestMessages: BetaMessageStreamParams['messages'] | null
+  // Last auto-mode classifier request(s) for /share transcript
+  lastClassifierRequests: unknown[] | null
+  // CLAUDE.md content cached by context.ts for the auto-mode classifier.
+  // Breaks the yoloClassifier → claudemd → filesystem → permissions cycle.
+  cachedClaudeMdContent: string | null
+  // In-memory error log for recent errors
+  inMemoryErrorLog: Array<{ error: string; timestamp: string }>
+  // Session-only plugins from --plugin-dir flag
+  inlinePlugins: Array<string>
+  // Explicit --chrome / --no-chrome flag value (undefined = not set on CLI)
+  chromeFlagOverride: boolean | undefined
+  // Use cowork_plugins directory instead of plugins (--cowork flag or env var)
+  useCoworkPlugins: boolean
+  // Session-only bypass permissions mode flag (not persisted)
+  sessionBypassPermissionsMode: boolean
+  // Session-only flag gating the .claude/scheduled_tasks.json watcher
+  // (useScheduledTasks). Set by cronScheduler.start() when the JSON has
+  // entries, or by CronCreateTool. Not persisted.
+  scheduledTasksEnabled: boolean
+  // Session-only cron tasks created via CronCreate with durable: false.
+  // Fire on schedule like file-backed tasks but are never written to
+  // .claude/scheduled_tasks.json — they die with the process. Typed via
+  // SessionCronTask below (not importing from cronTasks.ts keeps
+  // bootstrap a leaf of the import DAG).
+  sessionCronTasks: SessionCronTask[]
+  // Teams created this session via TeamCreate. cleanupSessionTeams()
+  // removes these on gracefulShutdown so subagent-created teams don't
+  // persist on disk forever (gh-32730). TeamDelete removes entries to
+  // avoid double-cleanup. Lives here (not teamHelpers.ts) so
+  // resetStateForTests() clears it between tests.
+  sessionCreatedTeams: Set<string>
+  // Session-only trust flag for home directory (not persisted to disk)
+  // When running from home dir, trust dialog is shown but not saved to disk.
+  // This flag allows features requiring trust to work during the session.
+  sessionTrustAccepted: boolean
+  // Session-only flag to disable session persistence to disk
+  sessionPersistenceDisabled: boolean
+  // Track if user has exited plan mode in this session (for re-entry guidance)
+  hasExitedPlanMode: boolean
+  // Track if we need to show the plan mode exit attachment (one-time notification)
+  needsPlanModeExitAttachment: boolean
+  // Track if we need to show the auto mode exit attachment (one-time notification)
+  needsAutoModeExitAttachment: boolean
+  // Track if LSP plugin recommendation has been shown this session (only show once)
+  lspRecommendationShownThisSession: boolean
+  // SDK init event state - jsonSchema for structured output
+  initJsonSchema: Record<string, unknown> | null
+  // Registered hooks - SDK callbacks and plugin native hooks
+  registeredHooks: Partial<Record<HookEvent, RegisteredHookMatcher[]>> | null
+  // Cache for plan slugs: sessionId -> wordSlug
+  planSlugCache: Map<string, string>
+  // Track teleported session for reliability logging
+  teleportedSessionInfo: {
+    isTeleported: boolean
+    hasLoggedFirstMessage: boolean
+    sessionId: string | null
+  } | null
+  // Track invoked skills for preservation across compaction
+  // Keys are composite: `${agentId ?? ''}:${skillName}` to prevent cross-agent overwrites
+  invokedSkills: Map<
+    string,
+    {
+      skillName: string
+      skillPath: string
+      content: string
+      invokedAt: number
+      agentId: string | null
+    }
+  >
+  // Track slow operations for dev bar display (ant-only)
+  slowOperations: Array<{
+    operation: string
+    durationMs: number
+    timestamp: number
+  }>
+  // SDK-provided betas (e.g., context-1m-2025-08-07)
+  sdkBetas: string[] | undefined
+  // Main thread agent type (from --agent flag or settings)
+  mainThreadAgentType: string | undefined
+  // Remote mode (--remote flag)
+  isRemoteMode: boolean
+  // Direct connect server URL (for display in header)
+  directConnectServerUrl: string | undefined
+  // System prompt section cache state
+  systemPromptSectionCache: Map<string, string | null>
+  // Last date emitted to the model (for detecting midnight date changes)
+  lastEmittedDate: string | null
+  // Additional directories from --add-dir flag (for CLAUDE.md loading)
+  additionalDirectoriesForClaudeMd: string[]
+  // Channel server allowlist from --channels flag (servers whose channel
+  // notifications should register this session). Parsed once in main.tsx —
+  // the tag decides trust model: 'plugin' → marketplace verification +
+  // allowlist, 'server' → allowlist always fails (schema is plugin-only).
+  // Either kind needs entry.dev to bypass allowlist.
+  allowedChannels: ChannelEntry[]
+  // True if any entry in allowedChannels came from
+  // --dangerously-load-development-channels (so ChannelsNotice can name the
+  // right flag in policy-blocked messages)
+  hasDevChannels: boolean
+  // Dir containing the session's `.jsonl`; null = derive from originalCwd.
+  sessionProjectDir: string | null
+  // Cached prompt cache 1h TTL allowlist from GrowthBook (session-stable)
+  promptCache1hAllowlist: string[] | null
+  // Cached 1h TTL user eligibility (session-stable). Latched on first
+  // evaluation so mid-session overage flips don't change the cache_control
+  // TTL, which would bust the server-side prompt cache.
+  promptCache1hEligible: boolean | null
+  // Sticky-on latch for AFK_MODE_BETA_HEADER. Once auto mode is first
+  // activated, keep sending the header for the rest of the session so
+  // Shift+Tab toggles don't bust the ~50-70K token prompt cache.
+  afkModeHeaderLatched: boolean | null
+  // Sticky-on latch for FAST_MODE_BETA_HEADER. Once fast mode is first
+  // enabled, keep sending the header so cooldown enter/exit doesn't
+  // double-bust the prompt cache. The `speed` body param stays dynamic.
+  fastModeHeaderLatched: boolean | null
+  // Sticky-on latch for the cache-editing beta header. Once cached
+  // microcompact is first enabled, keep sending the header so mid-session
+  // GrowthBook/settings toggles don't bust the prompt cache.
+  cacheEditingHeaderLatched: boolean | null
+  // Sticky-on latch for clearing thinking from prior tool loops. Triggered
+  // when >1h since last API call (confirmed cache miss — no cache-hit
+  // benefit to keeping thinking). Once latched, stays on so the newly-warmed
+  // thinking-cleared cache isn't busted by flipping back to keep:'all'.
+  thinkingClearLatched: boolean | null
+  // Current prompt ID (UUID) correlating a user prompt with subsequent OTel events
+  promptId: string | null
+  // Last API requestId for the main conversation chain (not subagents).
+  // Updated after each successful API response for main-session queries.
+  // Read at shutdown to send cache eviction hints to inference.
+  lastMainRequestId: string | undefined
+  // Timestamp (Date.now()) of the last successful API call completion.
+  // Used to compute timeSinceLastApiCallMs in tengu_api_success for
+  // correlating cache misses with idle time (cache TTL is ~5min).
+  lastApiCompletionTimestamp: number | null
+  // Set to true after compaction (auto or manual /compact). Consumed by
+  // logAPISuccess to tag the first post-compaction API call so we can
+  // distinguish compaction-induced cache misses from TTL expiry.
+  pendingPostCompaction: boolean
+}
+
+// ALSO HERE - THINK THRICE BEFORE MODIFYING
+function getInitialState(): State {
+  // Resolve symlinks in cwd to match behavior of shell.ts setCwd
+  // This ensures consistency with how paths are sanitized for session storage
+  let resolvedCwd = ''
+  if (
+    typeof process !== 'undefined' &&
+    typeof process.cwd === 'function' &&
+    typeof realpathSync === 'function'
+  ) {
+    const rawCwd = cwd()
+    try {
+      resolvedCwd = realpathSync(rawCwd).normalize('NFC')
+    } catch {
+      // File Provider EPERM on CloudStorage mounts (lstat per path component).
+      resolvedCwd = rawCwd.normalize('NFC')
+    }
+  }
+  const state: State = {
+    originalCwd: resolvedCwd,
+    projectRoot: resolvedCwd,
+    totalCostUSD: 0,
+    totalAPIDuration: 0,
+    totalAPIDurationWithoutRetries: 0,
+    totalToolDuration: 0,
+    turnHookDurationMs: 0,
+    turnToolDurationMs: 0,
+    turnClassifierDurationMs: 0,
+    turnToolCount: 0,
+    turnHookCount: 0,
+    turnClassifierCount: 0,
+    startTime: Date.now(),
+    lastInteractionTime: Date.now(),
+    totalLinesAdded: 0,
+    totalLinesRemoved: 0,
+    hasUnknownModelCost: false,
+    cwd: resolvedCwd,
+    modelUsage: {},
+    mainLoopModelOverride: undefined,
+    initialMainLoopModel: null,
+    modelStrings: null,
+    isInteractive: false,
+    kairosActive: false,
+    strictToolResultPairing: false,
+    sdkAgentProgressSummariesEnabled: false,
+    userMsgOptIn: false,
+    clientType: 'cli',
+    sessionSource: undefined,
+    questionPreviewFormat: undefined,
+    sessionIngressToken: undefined,
+    oauthTokenFromFd: undefined,
+    apiKeyFromFd: undefined,
+    flagSettingsPath: undefined,
+    flagSettingsInline: null,
+    allowedSettingSources: [
+      'userSettings',
+      'projectSettings',
+      'localSettings',
+      'flagSettings',
+      'policySettings',
+    ],
+    // Telemetry state
+    meter: null,
+    sessionCounter: null,
+    locCounter: null,
+    prCounter: null,
+    commitCounter: null,
+    costCounter: null,
+    tokenCounter: null,
+    codeEditToolDecisionCounter: null,
+    activeTimeCounter: null,
+    statsStore: null,
+    sessionId: randomUUID() as SessionId,
+    parentSessionId: undefined,
+    // Logger state
+    loggerProvider: null,
+    eventLogger: null,
+    // Meter provider state
+    meterProvider: null,
+    tracerProvider: null,
+    // Agent color state
+    agentColorMap: new Map(),
+    agentColorIndex: 0,
+    // Last API request for bug reports
+    lastAPIRequest: null,
+    lastAPIRequestMessages: null,
+    // Last auto-mode classifier request(s) for /share transcript
+    lastClassifierRequests: null,
+    cachedClaudeMdContent: null,
+    // In-memory error log for recent errors
+    inMemoryErrorLog: [],
+    // Session-only plugins from --plugin-dir flag
+    inlinePlugins: [],
+    // Explicit --chrome / --no-chrome flag value (undefined = not set on CLI)
+    chromeFlagOverride: undefined,
+    // Use cowork_plugins directory instead of plugins
+    useCoworkPlugins: false,
+    // Session-only bypass permissions mode flag (not persisted)
+    sessionBypassPermissionsMode: false,
+    // Scheduled tasks disabled until flag or dialog enables them
+    scheduledTasksEnabled: false,
+    sessionCronTasks: [],
+    sessionCreatedTeams: new Set(),
+    // Session-only trust flag (not persisted to disk)
+    sessionTrustAccepted: false,
+    // Session-only flag to disable session persistence to disk
+    sessionPersistenceDisabled: false,
+    // Track if user has exited plan mode in this session
+    hasExitedPlanMode: false,
+    // Track if we need to show the plan mode exit attachment
+    needsPlanModeExitAttachment: false,
+    // Track if we need to show the auto mode exit attachment
+    needsAutoModeExitAttachment: false,
+    // Track if LSP plugin recommendation has been shown this session
+    lspRecommendationShownThisSession: false,
+    // SDK init event state
+    initJsonSchema: null,
+    registeredHooks: null,
+    // Cache for plan slugs
+    planSlugCache: new Map(),
+    // Track teleported session for reliability logging
+    teleportedSessionInfo: null,
+    // Track invoked skills for preservation across compaction
+    invokedSkills: new Map(),
+    // Track slow operations for dev bar display
+    slowOperations: [],
+    // SDK-provided betas
+    sdkBetas: undefined,
+    // Main thread agent type
+    mainThreadAgentType: undefined,
+    // Remote mode
+    isRemoteMode: false,
+    ...(process.env.USER_TYPE === 'ant'
+      ? {
+          replBridgeActive: false,
+        }
+      : {}),
+    // Direct connect server URL
+    directConnectServerUrl: undefined,
+    // System prompt section cache state
+    systemPromptSectionCache: new Map(),
+    // Last date emitted to the model
+    lastEmittedDate: null,
+    // Additional directories from --add-dir flag (for CLAUDE.md loading)
+    additionalDirectoriesForClaudeMd: [],
+    // Channel server allowlist from --channels flag
+    allowedChannels: [],
+    hasDevChannels: false,
+    // Session project dir (null = derive from originalCwd)
+    sessionProjectDir: null,
+    // Prompt cache 1h allowlist (null = not yet fetched from GrowthBook)
+    promptCache1hAllowlist: null,
+    // Prompt cache 1h eligibility (null = not yet evaluated)
+    promptCache1hEligible: null,
+    // Beta header latches (null = not yet triggered)
+    afkModeHeaderLatched: null,
+    fastModeHeaderLatched: null,
+    cacheEditingHeaderLatched: null,
+    thinkingClearLatched: null,
+    // Current prompt ID
+    promptId: null,
+    lastMainRequestId: undefined,
+    lastApiCompletionTimestamp: null,
+    pendingPostCompaction: false,
+  }
+
+  return state
+}
+
+// AND ESPECIALLY HERE
+const STATE: State = getInitialState()
+
+export function getSessionId(): SessionId {
+  return STATE.sessionId
+}
+
+export function regenerateSessionId(
+  options: { setCurrentAsParent?: boolean } = {},
+): SessionId {
+  if (options.setCurrentAsParent) {
+    STATE.parentSessionId = STATE.sessionId
+  }
+  // Drop the outgoing session's plan-slug entry so the Map doesn't
+  // accumulate stale keys. Callers that need to carry the slug across
+  // (REPL.tsx clearContext) read it before calling clearConversation.
+  STATE.planSlugCache.delete(STATE.sessionId)
+  // Regenerated sessions live in the current project: reset projectDir to
+  // null so getTranscriptPath() derives from originalCwd.
+  STATE.sessionId = randomUUID() as SessionId
+  STATE.sessionProjectDir = null
+  return STATE.sessionId
+}
+
+export function getParentSessionId(): SessionId | undefined {
+  return STATE.parentSessionId
+}
+
+/**
+ * Atomically switch the active session. `sessionId` and `sessionProjectDir`
+ * always change together — there is no separate setter for either, so they
+ * cannot drift out of sync (CC-34).
+ *
+ * @param projectDir — directory containing `<sessionId>.jsonl`. Omit (or
+ *   pass `null`) for sessions in the current project — the path will derive
+ *   from originalCwd at read time. Pass `dirname(transcriptPath)` when the
+ *   session lives in a different project directory (git worktrees,
+ *   cross-project resume). Every call resets the project dir; it never
+ *   carries over from the previous session.
+ */
+export function switchSession(
+  sessionId: SessionId,
+  projectDir: string | null = null,
+): void {
+  // Drop the outgoing session's plan-slug entry so the Map stays bounded
+  // across repeated /resume. Only the current session's slug is ever read
+  // (plans.ts getPlanSlug defaults to getSessionId()).
+  STATE.planSlugCache.delete(STATE.sessionId)
+  STATE.sessionId = sessionId
+  STATE.sessionProjectDir = projectDir
+  sessionSwitched.emit(sessionId)
+}
+
+const sessionSwitched = createSignal<[id: SessionId]>()
+
+/**
+ * Register a callback that fires when switchSession changes the active
+ * sessionId. bootstrap can't import listeners directly (DAG leaf), so
+ * callers register themselves. concurrentSessions.ts uses this to keep the
+ * PID file's sessionId in sync with --resume.
+ */
+export const onSessionSwitch = sessionSwitched.subscribe
+
+/**
+ * Project directory the current session's transcript lives in, or `null` if
+ * the session was created in the current project (common case — derive from
+ * originalCwd). See `switchSession()`.
+ */
+export function getSessionProjectDir(): string | null {
+  return STATE.sessionProjectDir
+}
+
+export function getOriginalCwd(): string {
+  return STATE.originalCwd
+}
+
+/**
+ * Get the stable project root directory.
+ * Unlike getOriginalCwd(), this is never updated by mid-session EnterWorktreeTool
+ * (so skills/history stay stable when entering a throwaway worktree).
+ * It IS set at startup by --worktree, since that worktree is the session's project.
+ * Use for project identity (history, skills, sessions) not file operations.
+ */
+export function getProjectRoot(): string {
+  return STATE.projectRoot
+}
+
+export function setOriginalCwd(cwd: string): void {
+  STATE.originalCwd = cwd.normalize('NFC')
+}
+
+/**
+ * Only for --worktree startup flag. Mid-session EnterWorktreeTool must NOT
+ * call this — skills/history should stay anchored to where the session started.
+ */
+export function setProjectRoot(cwd: string): void {
+  STATE.projectRoot = cwd.normalize('NFC')
+}
+
+export function getCwdState(): string {
+  return STATE.cwd
+}
+
+export function setCwdState(cwd: string): void {
+  STATE.cwd = cwd.normalize('NFC')
+}
+
+export function getDirectConnectServerUrl(): string | undefined {
+  return STATE.directConnectServerUrl
+}
+
+export function setDirectConnectServerUrl(url: string): void {
+  STATE.directConnectServerUrl = url
+}
+
+export function addToTotalDurationState(
+  duration: number,
+  durationWithoutRetries: number,
+): void {
+  STATE.totalAPIDuration += duration
+  STATE.totalAPIDurationWithoutRetries += durationWithoutRetries
+}
+
+export function resetTotalDurationStateAndCost_FOR_TESTS_ONLY(): void {
+  STATE.totalAPIDuration = 0
+  STATE.totalAPIDurationWithoutRetries = 0
+  STATE.totalCostUSD = 0
+}
+
+export function addToTotalCostState(
+  cost: number,
+  modelUsage: ModelUsage,
+  model: string,
+): void {
+  STATE.modelUsage[model] = modelUsage
+  STATE.totalCostUSD += cost
+}
+
+export function getTotalCostUSD(): number {
+  return STATE.totalCostUSD
+}
+
+export function getTotalAPIDuration(): number {
+  return STATE.totalAPIDuration
+}
+
+export function getTotalDuration(): number {
+  return Date.now() - STATE.startTime
+}
+
+export function getTotalAPIDurationWithoutRetries(): number {
+  return STATE.totalAPIDurationWithoutRetries
+}
+
+export function getTotalToolDuration(): number {
+  return STATE.totalToolDuration
+}
+
+export function addToToolDuration(duration: number): void {
+  STATE.totalToolDuration += duration
+  STATE.turnToolDurationMs += duration
+  STATE.turnToolCount++
+}
+
+export function getTurnHookDurationMs(): number {
+  return STATE.turnHookDurationMs
+}
+
+export function addToTurnHookDuration(duration: number): void {
+  STATE.turnHookDurationMs += duration
+  STATE.turnHookCount++
+}
+
+export function resetTurnHookDuration(): void {
+  STATE.turnHookDurationMs = 0
+  STATE.turnHookCount = 0
+}
+
+export function getTurnHookCount(): number {
+  return STATE.turnHookCount
+}
+
+export function getTurnToolDurationMs(): number {
+  return STATE.turnToolDurationMs
+}
+
+export function resetTurnToolDuration(): void {
+  STATE.turnToolDurationMs = 0
+  STATE.turnToolCount = 0
+}
+
+export function getTurnToolCount(): number {
+  return STATE.turnToolCount
+}
+
+export function getTurnClassifierDurationMs(): number {
+  return STATE.turnClassifierDurationMs
+}
+
+export function addToTurnClassifierDuration(duration: number): void {
+  STATE.turnClassifierDurationMs += duration
+  STATE.turnClassifierCount++
+}
+
+export function resetTurnClassifierDuration(): void {
+  STATE.turnClassifierDurationMs = 0
+  STATE.turnClassifierCount = 0
+}
+
+export function getTurnClassifierCount(): number {
+  return STATE.turnClassifierCount
+}
+
+export function getStatsStore(): {
+  observe(name: string, value: number): void
+} | null {
+  return STATE.statsStore
+}
+
+export function setStatsStore(
+  store: { observe(name: string, value: number): void } | null,
+): void {
+  STATE.statsStore = store
+}
+
+/**
+ * Marks that an interaction occurred.
+ *
+ * By default the actual Date.now() call is deferred until the next Ink render
+ * frame (via flushInteractionTime()) so we avoid calling Date.now() on every
+ * single keypress.
+ *
+ * Pass `immediate = true` when calling from React useEffect callbacks or
+ * other code that runs *after* the Ink render cycle has already flushed.
+ * Without it the timestamp stays stale until the next render, which may never
+ * come if the user is idle (e.g. permission dialog waiting for input).
+ */
+let interactionTimeDirty = false
+
+export function updateLastInteractionTime(immediate?: boolean): void {
+  if (immediate) {
+    flushInteractionTime_inner()
+  } else {
+    interactionTimeDirty = true
+  }
+}
+
+/**
+ * If an interaction was recorded since the last flush, update the timestamp
+ * now. Called by Ink before each render cycle so we batch many keypresses into
+ * a single Date.now() call.
+ */
+export function flushInteractionTime(): void {
+  if (interactionTimeDirty) {
+    flushInteractionTime_inner()
+  }
+}
+
+function flushInteractionTime_inner(): void {
+  STATE.lastInteractionTime = Date.now()
+  interactionTimeDirty = false
+}
+
+export function addToTotalLinesChanged(added: number, removed: number): void {
+  STATE.totalLinesAdded += added
+  STATE.totalLinesRemoved += removed
+}
+
+export function getTotalLinesAdded(): number {
+  return STATE.totalLinesAdded
+}
+
+export function getTotalLinesRemoved(): number {
+  return STATE.totalLinesRemoved
+}
+
+export function getTotalInputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'inputTokens')
+}
+
+export function getTotalOutputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'outputTokens')
+}
+
+export function getTotalCacheReadInputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'cacheReadInputTokens')
+}
+
+export function getTotalCacheCreationInputTokens(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'cacheCreationInputTokens')
+}
+
+export function getTotalWebSearchRequests(): number {
+  return sumBy(Object.values(STATE.modelUsage), 'webSearchRequests')
+}
+
+let outputTokensAtTurnStart = 0
+let currentTurnTokenBudget: number | null = null
+export function getTurnOutputTokens(): number {
+  return getTotalOutputTokens() - outputTokensAtTurnStart
+}
+export function getCurrentTurnTokenBudget(): number | null {
+  return currentTurnTokenBudget
+}
+let budgetContinuationCount = 0
+export function snapshotOutputTokensForTurn(budget: number | null): void {
+  outputTokensAtTurnStart = getTotalOutputTokens()
+  currentTurnTokenBudget = budget
+  budgetContinuationCount = 0
+}
+export function getBudgetContinuationCount(): number {
+  return budgetContinuationCount
+}
+export function incrementBudgetContinuationCount(): void {
+  budgetContinuationCount++
+}
+
+export function setHasUnknownModelCost(): void {
+  STATE.hasUnknownModelCost = true
+}
+
+export function hasUnknownModelCost(): boolean {
+  return STATE.hasUnknownModelCost
+}
+
+export function getLastMainRequestId(): string | undefined {
+  return STATE.lastMainRequestId
+}
+
+export function setLastMainRequestId(requestId: string): void {
+  STATE.lastMainRequestId = requestId
+}
+
+export function getLastApiCompletionTimestamp(): number | null {
+  return STATE.lastApiCompletionTimestamp
+}
+
+export function setLastApiCompletionTimestamp(timestamp: number): void {
+  STATE.lastApiCompletionTimestamp = timestamp
+}
+
+/** Mark that a compaction just occurred. The next API success event will
+ *  include isPostCompaction=true, then the flag auto-resets. */
+export function markPostCompaction(): void {
+  STATE.pendingPostCompaction = true
+}
+
+/** Consume the post-compaction flag. Returns true once after compaction,
+ *  then returns false until the next compaction. */
+export function consumePostCompaction(): boolean {
+  const was = STATE.pendingPostCompaction
+  STATE.pendingPostCompaction = false
+  return was
+}
+
+export function getLastInteractionTime(): number {
+  return STATE.lastInteractionTime
+}
+
+// Scroll drain suspension — background intervals check this before doing work
+// so they don't compete with scroll frames for the event loop. Set by
+// ScrollBox scrollBy/scrollTo, cleared SCROLL_DRAIN_IDLE_MS after the last
+// scroll event. Module-scope (not in STATE) — ephemeral hot-path flag, no
+// test-reset needed since the debounce timer self-clears.
+let scrollDraining = false
+let scrollDrainTimer: ReturnType<typeof setTimeout> | undefined
+const SCROLL_DRAIN_IDLE_MS = 150
+
+/** Mark that a scroll event just happened. Background intervals gate on
+ *  getIsScrollDraining() and skip their work until the debounce clears. */
+export function markScrollActivity(): void {
+  scrollDraining = true
+  if (scrollDrainTimer) clearTimeout(scrollDrainTimer)
+  scrollDrainTimer = setTimeout(() => {
+    scrollDraining = false
+    scrollDrainTimer = undefined
+  }, SCROLL_DRAIN_IDLE_MS)
+  scrollDrainTimer.unref?.()
+}
+
+/** True while scroll is actively draining (within 150ms of last event).
+ *  Intervals should early-return when this is set — the work picks up next
+ *  tick after scroll settles. */
+export function getIsScrollDraining(): boolean {
+  return scrollDraining
+}
+
+/** Await this before expensive one-shot work (network, subprocess) that could
+ *  coincide with scroll. Resolves immediately if not scrolling; otherwise
+ *  polls at the idle interval until the flag clears. */
+export async function waitForScrollIdle(): Promise<void> {
+  while (scrollDraining) {
+    // bootstrap-isolation forbids importing sleep() from src/utils/
+    // eslint-disable-next-line no-restricted-syntax
+    await new Promise(r => setTimeout(r, SCROLL_DRAIN_IDLE_MS).unref?.())
+  }
+}
+
+export function getModelUsage(): { [modelName: string]: ModelUsage } {
+  return STATE.modelUsage
+}
+
+export function getUsageForModel(model: string): ModelUsage | undefined {
+  return STATE.modelUsage[model]
+}
+
+/**
+ * Gets the model override set from the --model CLI flag or after the user
+ * updates their configured model.
+ */
+export function getMainLoopModelOverride(): ModelSetting | undefined {
+  return STATE.mainLoopModelOverride
+}
+
+export function getInitialMainLoopModel(): ModelSetting {
+  return STATE.initialMainLoopModel
+}
+
+export function setMainLoopModelOverride(
+  model: ModelSetting | undefined,
+): void {
+  STATE.mainLoopModelOverride = model
+}
+
+export function setInitialMainLoopModel(model: ModelSetting): void {
+  STATE.initialMainLoopModel = model
+}
+
+export function getSdkBetas(): string[] | undefined {
+  return STATE.sdkBetas
+}
+
+export function setSdkBetas(betas: string[] | undefined): void {
+  STATE.sdkBetas = betas
+}
+
+export function resetCostState(): void {
+  STATE.totalCostUSD = 0
+  STATE.totalAPIDuration = 0
+  STATE.totalAPIDurationWithoutRetries = 0
+  STATE.totalToolDuration = 0
+  STATE.startTime = Date.now()
+  STATE.totalLinesAdded = 0
+  STATE.totalLinesRemoved = 0
+  STATE.hasUnknownModelCost = false
+  STATE.modelUsage = {}
+  STATE.promptId = null
+}
+
+/**
+ * Sets cost state values for session restore.
+ * Called by restoreCostStateForSession in cost-tracker.ts.
+ */
+export function setCostStateForRestore({
+  totalCostUSD,
+  totalAPIDuration,
+  totalAPIDurationWithoutRetries,
+  totalToolDuration,
+  totalLinesAdded,
+  totalLinesRemoved,
+  lastDuration,
+  modelUsage,
+}: {
+  totalCostUSD: number
+  totalAPIDuration: number
+  totalAPIDurationWithoutRetries: number
+  totalToolDuration: number
+  totalLinesAdded: number
+  totalLinesRemoved: number
+  lastDuration: number | undefined
+  modelUsage: { [modelName: string]: ModelUsage } | undefined
+}): void {
+  STATE.totalCostUSD = totalCostUSD
+  STATE.totalAPIDuration = totalAPIDuration
+  STATE.totalAPIDurationWithoutRetries = totalAPIDurationWithoutRetries
+  STATE.totalToolDuration = totalToolDuration
+  STATE.totalLinesAdded = totalLinesAdded
+  STATE.totalLinesRemoved = totalLinesRemoved
+
+  // Restore per-model usage breakdown
+  if (modelUsage) {
+    STATE.modelUsage = modelUsage
+  }
+
+  // Adjust startTime to make wall duration accumulate
+  if (lastDuration) {
+    STATE.startTime = Date.now() - lastDuration
+  }
+}
+
+// Only used in tests
+export function resetStateForTests(): void {
+  if (process.env.NODE_ENV !== 'test') {
+    throw new Error('resetStateForTests can only be called in tests')
+  }
+  Object.entries(getInitialState()).forEach(([key, value]) => {
+    STATE[key as keyof State] = value as never
+  })
+  outputTokensAtTurnStart = 0
+  currentTurnTokenBudget = null
+  budgetContinuationCount = 0
+  sessionSwitched.clear()
+}
+
+// You shouldn't use this directly. See src/utils/model/modelStrings.ts::getModelStrings()
+export function getModelStrings(): ModelStrings | null {
+  return STATE.modelStrings
+}
+
+// You shouldn't use this directly. See src/utils/model/modelStrings.ts
+export function setModelStrings(modelStrings: ModelStrings): void {
+  STATE.modelStrings = modelStrings
+}
+
+// Test utility function to reset model strings for re-initialization.
+// Separate from setModelStrings because we only want to accept 'null' in tests.
+export function resetModelStringsForTestingOnly() {
+  STATE.modelStrings = null
+}
+
+export function setMeter(
+  meter: Meter,
+  createCounter: (name: string, options: MetricOptions) => AttributedCounter,
+): void {
+  STATE.meter = meter
+
+  // Initialize all counters using the provided factory
+  STATE.sessionCounter = createCounter('claude_code.session.count', {
+    description: 'Count of CLI sessions started',
+  })
+  STATE.locCounter = createCounter('claude_code.lines_of_code.count', {
+    description:
+      "Count of lines of code modified, with the 'type' attribute indicating whether lines were added or removed",
+  })
+  STATE.prCounter = createCounter('claude_code.pull_request.count', {
+    description: 'Number of pull requests created',
+  })
+  STATE.commitCounter = createCounter('claude_code.commit.count', {
+    description: 'Number of git commits created',
+  })
+  STATE.costCounter = createCounter('claude_code.cost.usage', {
+    description: 'Cost of the Claude Code session',
+    unit: 'USD',
+  })
+  STATE.tokenCounter = createCounter('claude_code.token.usage', {
+    description: 'Number of tokens used',
+    unit: 'tokens',
+  })
+  STATE.codeEditToolDecisionCounter = createCounter(
+    'claude_code.code_edit_tool.decision',
+    {
+      description:
+        'Count of code editing tool permission decisions (accept/reject) for Edit, Write, and NotebookEdit tools',
+    },
+  )
+  STATE.activeTimeCounter = createCounter('claude_code.active_time.total', {
+    description: 'Total active time in seconds',
+    unit: 's',
+  })
+}
+
+export function getMeter(): Meter | null {
+  return STATE.meter
+}
+
+export function getSessionCounter(): AttributedCounter | null {
+  return STATE.sessionCounter
+}
+
+export function getLocCounter(): AttributedCounter | null {
+  return STATE.locCounter
+}
+
+export function getPrCounter(): AttributedCounter | null {
+  return STATE.prCounter
+}
+
+export function getCommitCounter(): AttributedCounter | null {
+  return STATE.commitCounter
+}
+
+export function getCostCounter(): AttributedCounter | null {
+  return STATE.costCounter
+}
+
+export function getTokenCounter(): AttributedCounter | null {
+  return STATE.tokenCounter
+}
+
+export function getCodeEditToolDecisionCounter(): AttributedCounter | null {
+  return STATE.codeEditToolDecisionCounter
+}
+
+export function getActiveTimeCounter(): AttributedCounter | null {
+  return STATE.activeTimeCounter
+}
+
+export function getLoggerProvider(): LoggerProvider | null {
+  return STATE.loggerProvider
+}
+
+export function setLoggerProvider(provider: LoggerProvider | null): void {
+  STATE.loggerProvider = provider
+}
+
+export function getEventLogger(): ReturnType<typeof logs.getLogger> | null {
+  return STATE.eventLogger
+}
+
+export function setEventLogger(
+  logger: ReturnType<typeof logs.getLogger> | null,
+): void {
+  STATE.eventLogger = logger
+}
+
+export function getMeterProvider(): MeterProvider | null {
+  return STATE.meterProvider
+}
+
+export function setMeterProvider(provider: MeterProvider | null): void {
+  STATE.meterProvider = provider
+}
+export function getTracerProvider(): BasicTracerProvider | null {
+  return STATE.tracerProvider
+}
+export function setTracerProvider(provider: BasicTracerProvider | null): void {
+  STATE.tracerProvider = provider
+}
+
+export function getIsNonInteractiveSession(): boolean {
+  return !STATE.isInteractive
+}
+
+export function getIsInteractive(): boolean {
+  return STATE.isInteractive
+}
+
+export function setIsInteractive(value: boolean): void {
+  STATE.isInteractive = value
+}
+
+export function getClientType(): string {
+  return STATE.clientType
+}
+
+export function setClientType(type: string): void {
+  STATE.clientType = type
+}
+
+export function getSdkAgentProgressSummariesEnabled(): boolean {
+  return STATE.sdkAgentProgressSummariesEnabled
+}
+
+export function setSdkAgentProgressSummariesEnabled(value: boolean): void {
+  STATE.sdkAgentProgressSummariesEnabled = value
+}
+
+export function getKairosActive(): boolean {
+  return STATE.kairosActive
+}
+
+export function setKairosActive(value: boolean): void {
+  STATE.kairosActive = value
+}
+
+export function getStrictToolResultPairing(): boolean {
+  return STATE.strictToolResultPairing
+}
+
+export function setStrictToolResultPairing(value: boolean): void {
+  STATE.strictToolResultPairing = value
+}
+
+// Field name 'userMsgOptIn' avoids excluded-string substrings ('BriefTool',
+// 'SendUserMessage' — case-insensitive). All callers are inside feature()
+// guards so these accessors don't need their own (matches getKairosActive).
+export function getUserMsgOptIn(): boolean {
+  return STATE.userMsgOptIn
+}
+
+export function setUserMsgOptIn(value: boolean): void {
+  STATE.userMsgOptIn = value
+}
+
+export function getSessionSource(): string | undefined {
+  return STATE.sessionSource
+}
+
+export function setSessionSource(source: string): void {
+  STATE.sessionSource = source
+}
+
+export function getQuestionPreviewFormat(): 'markdown' | 'html' | undefined {
+  return STATE.questionPreviewFormat
+}
+
+export function setQuestionPreviewFormat(format: 'markdown' | 'html'): void {
+  STATE.questionPreviewFormat = format
+}
+
+export function getAgentColorMap(): Map<string, AgentColorName> {
+  return STATE.agentColorMap
+}
+
+export function getFlagSettingsPath(): string | undefined {
+  return STATE.flagSettingsPath
+}
+
+export function setFlagSettingsPath(path: string | undefined): void {
+  STATE.flagSettingsPath = path
+}
+
+export function getFlagSettingsInline(): Record<string, unknown> | null {
+  return STATE.flagSettingsInline
+}
+
+export function setFlagSettingsInline(
+  settings: Record<string, unknown> | null,
+): void {
+  STATE.flagSettingsInline = settings
+}
+
+export function getSessionIngressToken(): string | null | undefined {
+  return STATE.sessionIngressToken
+}
+
+export function setSessionIngressToken(token: string | null): void {
+  STATE.sessionIngressToken = token
+}
+
+export function getOauthTokenFromFd(): string | null | undefined {
+  return STATE.oauthTokenFromFd
+}
+
+export function setOauthTokenFromFd(token: string | null): void {
+  STATE.oauthTokenFromFd = token
+}
+
+export function getApiKeyFromFd(): string | null | undefined {
+  return STATE.apiKeyFromFd
+}
+
+export function setApiKeyFromFd(key: string | null): void {
+  STATE.apiKeyFromFd = key
+}
+
+export function setLastAPIRequest(
+  params: Omit<BetaMessageStreamParams, 'messages'> | null,
+): void {
+  STATE.lastAPIRequest = params
+}
+
+export function getLastAPIRequest(): Omit<
+  BetaMessageStreamParams,
+  'messages'
+> | null {
+  return STATE.lastAPIRequest
+}
+
+export function setLastAPIRequestMessages(
+  messages: BetaMessageStreamParams['messages'] | null,
+): void {
+  STATE.lastAPIRequestMessages = messages
+}
+
+export function getLastAPIRequestMessages():
+  | BetaMessageStreamParams['messages']
+  | null {
+  return STATE.lastAPIRequestMessages
+}
+
+export function setLastClassifierRequests(requests: unknown[] | null): void {
+  STATE.lastClassifierRequests = requests
+}
+
+export function getLastClassifierRequests(): unknown[] | null {
+  return STATE.lastClassifierRequests
+}
+
+export function setCachedClaudeMdContent(content: string | null): void {
+  STATE.cachedClaudeMdContent = content
+}
+
+export function getCachedClaudeMdContent(): string | null {
+  return STATE.cachedClaudeMdContent
+}
+
+export function addToInMemoryErrorLog(errorInfo: {
+  error: string
+  timestamp: string
+}): void {
+  const MAX_IN_MEMORY_ERRORS = 100
+  if (STATE.inMemoryErrorLog.length >= MAX_IN_MEMORY_ERRORS) {
+    STATE.inMemoryErrorLog.shift() // Remove oldest error
+  }
+  STATE.inMemoryErrorLog.push(errorInfo)
+}
+
+export function getAllowedSettingSources(): SettingSource[] {
+  return STATE.allowedSettingSources
+}
+
+export function setAllowedSettingSources(sources: SettingSource[]): void {
+  STATE.allowedSettingSources = sources
+}
+
+export function preferThirdPartyAuthentication(): boolean {
+  // IDE extension should behave as 1P for authentication reasons.
+  return getIsNonInteractiveSession() && STATE.clientType !== 'claude-vscode'
+}
+
+export function setInlinePlugins(plugins: Array<string>): void {
+  STATE.inlinePlugins = plugins
+}
+
+export function getInlinePlugins(): Array<string> {
+  return STATE.inlinePlugins
+}
+
+export function setChromeFlagOverride(value: boolean | undefined): void {
+  STATE.chromeFlagOverride = value
+}
+
+export function getChromeFlagOverride(): boolean | undefined {
+  return STATE.chromeFlagOverride
+}
+
+export function setUseCoworkPlugins(value: boolean): void {
+  STATE.useCoworkPlugins = value
+  resetSettingsCache()
+}
+
+export function getUseCoworkPlugins(): boolean {
+  return STATE.useCoworkPlugins
+}
+
+export function setSessionBypassPermissionsMode(enabled: boolean): void {
+  STATE.sessionBypassPermissionsMode = enabled
+}
+
+export function getSessionBypassPermissionsMode(): boolean {
+  return STATE.sessionBypassPermissionsMode
+}
+
+export function setScheduledTasksEnabled(enabled: boolean): void {
+  STATE.scheduledTasksEnabled = enabled
+}
+
+export function getScheduledTasksEnabled(): boolean {
+  return STATE.scheduledTasksEnabled
+}
+
+export type SessionCronTask = {
+  id: string
+  cron: string
+  prompt: string
+  createdAt: number
+  recurring?: boolean
+  /**
+   * When set, the task was created by an in-process teammate (not the team lead).
+   * The scheduler routes fires to that teammate's pendingUserMessages queue
+   * instead of the main REPL command queue. Session-only — never written to disk.
+   */
+  agentId?: string
+}
+
+export function getSessionCronTasks(): SessionCronTask[] {
+  return STATE.sessionCronTasks
+}
+
+export function addSessionCronTask(task: SessionCronTask): void {
+  STATE.sessionCronTasks.push(task)
+}
+
+/**
+ * Returns the number of tasks actually removed. Callers use this to skip
+ * downstream work (e.g. the disk read in removeCronTasks) when all ids
+ * were accounted for here.
+ */
+export function removeSessionCronTasks(ids: readonly string[]): number {
+  if (ids.length === 0) return 0
+  const idSet = new Set(ids)
+  const remaining = STATE.sessionCronTasks.filter(t => !idSet.has(t.id))
+  const removed = STATE.sessionCronTasks.length - remaining.length
+  if (removed === 0) return 0
+  STATE.sessionCronTasks = remaining
+  return removed
+}
+
+export function setSessionTrustAccepted(accepted: boolean): void {
+  STATE.sessionTrustAccepted = accepted
+}
+
+export function getSessionTrustAccepted(): boolean {
+  return STATE.sessionTrustAccepted
+}
+
+export function setSessionPersistenceDisabled(disabled: boolean): void {
+  STATE.sessionPersistenceDisabled = disabled
+}
+
+export function isSessionPersistenceDisabled(): boolean {
+  return STATE.sessionPersistenceDisabled
+}
+
+export function hasExitedPlanModeInSession(): boolean {
+  return STATE.hasExitedPlanMode
+}
+
+export function setHasExitedPlanMode(value: boolean): void {
+  STATE.hasExitedPlanMode = value
+}
+
+export function needsPlanModeExitAttachment(): boolean {
+  return STATE.needsPlanModeExitAttachment
+}
+
+export function setNeedsPlanModeExitAttachment(value: boolean): void {
+  STATE.needsPlanModeExitAttachment = value
+}
+
+export function handlePlanModeTransition(
+  fromMode: string,
+  toMode: string,
+): void {
+  // If switching TO plan mode, clear any pending exit attachment
+  // This prevents sending both plan_mode and plan_mode_exit when user toggles quickly
+  if (toMode === 'plan' && fromMode !== 'plan') {
+    STATE.needsPlanModeExitAttachment = false
+  }
+
+  // If switching out of plan mode, trigger the plan_mode_exit attachment
+  if (fromMode === 'plan' && toMode !== 'plan') {
+    STATE.needsPlanModeExitAttachment = true
+  }
+}
+
+export function needsAutoModeExitAttachment(): boolean {
+  return STATE.needsAutoModeExitAttachment
+}
+
+export function setNeedsAutoModeExitAttachment(value: boolean): void {
+  STATE.needsAutoModeExitAttachment = value
+}
+
+export function handleAutoModeTransition(
+  fromMode: string,
+  toMode: string,
+): void {
+  // Auto↔plan transitions are handled by prepareContextForPlanMode (auto may
+  // stay active through plan if opted in) and ExitPlanMode (restores mode).
+  // Skip both directions so this function only handles direct auto transitions.
+  if (
+    (fromMode === 'auto' && toMode === 'plan') ||
+    (fromMode === 'plan' && toMode === 'auto')
+  ) {
+    return
+  }
+  const fromIsAuto = fromMode === 'auto'
+  const toIsAuto = toMode === 'auto'
+
+  // If switching TO auto mode, clear any pending exit attachment
+  // This prevents sending both auto_mode and auto_mode_exit when user toggles quickly
+  if (toIsAuto && !fromIsAuto) {
+    STATE.needsAutoModeExitAttachment = false
+  }
+
+  // If switching out of auto mode, trigger the auto_mode_exit attachment
+  if (fromIsAuto && !toIsAuto) {
+    STATE.needsAutoModeExitAttachment = true
+  }
+}
+
+// LSP plugin recommendation session tracking
+export function hasShownLspRecommendationThisSession(): boolean {
+  return STATE.lspRecommendationShownThisSession
+}
+
+export function setLspRecommendationShownThisSession(value: boolean): void {
+  STATE.lspRecommendationShownThisSession = value
+}
+
+// SDK init event state
+export function setInitJsonSchema(schema: Record<string, unknown>): void {
+  STATE.initJsonSchema = schema
+}
+
+export function getInitJsonSchema(): Record<string, unknown> | null {
+  return STATE.initJsonSchema
+}
+
+export function registerHookCallbacks(
+  hooks: Partial<Record<HookEvent, RegisteredHookMatcher[]>>,
+): void {
+  if (!STATE.registeredHooks) {
+    STATE.registeredHooks = {}
+  }
+
+  // `registerHookCallbacks` may be called multiple times, so we need to merge (not overwrite)
+  for (const [event, matchers] of Object.entries(hooks)) {
+    const eventKey = event as HookEvent
+    if (!STATE.registeredHooks[eventKey]) {
+      STATE.registeredHooks[eventKey] = []
+    }
+    STATE.registeredHooks[eventKey]!.push(...matchers)
+  }
+}
+
+export function getRegisteredHooks(): Partial<
+  Record<HookEvent, RegisteredHookMatcher[]>
+> | null {
+  return STATE.registeredHooks
+}
+
+export function clearRegisteredHooks(): void {
+  STATE.registeredHooks = null
+}
+
+export function clearRegisteredPluginHooks(): void {
+  if (!STATE.registeredHooks) {
+    return
+  }
+
+  const filtered: Partial<Record<HookEvent, RegisteredHookMatcher[]>> = {}
+  for (const [event, matchers] of Object.entries(STATE.registeredHooks)) {
+    // Keep only callback hooks (those without pluginRoot)
+    const callbackHooks = matchers.filter(m => !('pluginRoot' in m))
+    if (callbackHooks.length > 0) {
+      filtered[event as HookEvent] = callbackHooks
+    }
+  }
+
+  STATE.registeredHooks = Object.keys(filtered).length > 0 ? filtered : null
+}
+
+export function resetSdkInitState(): void {
+  STATE.initJsonSchema = null
+  STATE.registeredHooks = null
+}
+
+export function getPlanSlugCache(): Map<string, string> {
+  return STATE.planSlugCache
+}
+
+export function getSessionCreatedTeams(): Set<string> {
+  return STATE.sessionCreatedTeams
+}
+
+// Teleported session tracking for reliability logging
+export function setTeleportedSessionInfo(info: {
+  sessionId: string | null
+}): void {
+  STATE.teleportedSessionInfo = {
+    isTeleported: true,
+    hasLoggedFirstMessage: false,
+    sessionId: info.sessionId,
+  }
+}
+
+export function getTeleportedSessionInfo(): {
+  isTeleported: boolean
+  hasLoggedFirstMessage: boolean
+  sessionId: string | null
+} | null {
+  return STATE.teleportedSessionInfo
+}
+
+export function markFirstTeleportMessageLogged(): void {
+  if (STATE.teleportedSessionInfo) {
+    STATE.teleportedSessionInfo.hasLoggedFirstMessage = true
+  }
+}
+
+// Invoked skills tracking for preservation across compaction
+export type InvokedSkillInfo = {
+  skillName: string
+  skillPath: string
+  content: string
+  invokedAt: number
+  agentId: string | null
+}
+
+export function addInvokedSkill(
+  skillName: string,
+  skillPath: string,
+  content: string,
+  agentId: string | null = null,
+): void {
+  const key = `${agentId ?? ''}:${skillName}`
+  STATE.invokedSkills.set(key, {
+    skillName,
+    skillPath,
+    content,
+    invokedAt: Date.now(),
+    agentId,
+  })
+}
+
+export function getInvokedSkills(): Map<string, InvokedSkillInfo> {
+  return STATE.invokedSkills
+}
+
+export function getInvokedSkillsForAgent(
+  agentId: string | undefined | null,
+): Map<string, InvokedSkillInfo> {
+  const normalizedId = agentId ?? null
+  const filtered = new Map<string, InvokedSkillInfo>()
+  for (const [key, skill] of STATE.invokedSkills) {
+    if (skill.agentId === normalizedId) {
+      filtered.set(key, skill)
+    }
+  }
+  return filtered
+}
+
+export function clearInvokedSkills(
+  preservedAgentIds?: ReadonlySet<string>,
+): void {
+  if (!preservedAgentIds || preservedAgentIds.size === 0) {
+    STATE.invokedSkills.clear()
+    return
+  }
+  for (const [key, skill] of STATE.invokedSkills) {
+    if (skill.agentId === null || !preservedAgentIds.has(skill.agentId)) {
+      STATE.invokedSkills.delete(key)
+    }
+  }
+}
+
+export function clearInvokedSkillsForAgent(agentId: string): void {
+  for (const [key, skill] of STATE.invokedSkills) {
+    if (skill.agentId === agentId) {
+      STATE.invokedSkills.delete(key)
+    }
+  }
+}
+
+// Slow operations tracking for dev bar
+const MAX_SLOW_OPERATIONS = 10
+const SLOW_OPERATION_TTL_MS = 10000
+
+export function addSlowOperation(operation: string, durationMs: number): void {
+  if (process.env.USER_TYPE !== 'ant') return
+  // Skip tracking for editor sessions (user editing a prompt file in $EDITOR)
+  // These are intentionally slow since the user is drafting text
+  if (operation.includes('exec') && operation.includes('claude-prompt-')) {
+    return
+  }
+  const now = Date.now()
+  // Remove stale operations
+  STATE.slowOperations = STATE.slowOperations.filter(
+    op => now - op.timestamp < SLOW_OPERATION_TTL_MS,
+  )
+  // Add new operation
+  STATE.slowOperations.push({ operation, durationMs, timestamp: now })
+  // Keep only the most recent operations
+  if (STATE.slowOperations.length > MAX_SLOW_OPERATIONS) {
+    STATE.slowOperations = STATE.slowOperations.slice(-MAX_SLOW_OPERATIONS)
+  }
+}
+
+const EMPTY_SLOW_OPERATIONS: ReadonlyArray<{
+  operation: string
+  durationMs: number
+  timestamp: number
+}> = []
+
+export function getSlowOperations(): ReadonlyArray<{
+  operation: string
+  durationMs: number
+  timestamp: number
+}> {
+  // Most common case: nothing tracked. Return a stable reference so the
+  // caller's setState() can bail via Object.is instead of re-rendering at 2fps.
+  if (STATE.slowOperations.length === 0) {
+    return EMPTY_SLOW_OPERATIONS
+  }
+  const now = Date.now()
+  // Only allocate a new array when something actually expired; otherwise keep
+  // the reference stable across polls while ops are still fresh.
+  if (
+    STATE.slowOperations.some(op => now - op.timestamp >= SLOW_OPERATION_TTL_MS)
+  ) {
+    STATE.slowOperations = STATE.slowOperations.filter(
+      op => now - op.timestamp < SLOW_OPERATION_TTL_MS,
+    )
+    if (STATE.slowOperations.length === 0) {
+      return EMPTY_SLOW_OPERATIONS
+    }
+  }
+  // Safe to return directly: addSlowOperation() reassigns STATE.slowOperations
+  // before pushing, so the array held in React state is never mutated.
+  return STATE.slowOperations
+}
+
+export function getMainThreadAgentType(): string | undefined {
+  return STATE.mainThreadAgentType
+}
+
+export function setMainThreadAgentType(agentType: string | undefined): void {
+  STATE.mainThreadAgentType = agentType
+}
+
+export function getIsRemoteMode(): boolean {
+  return STATE.isRemoteMode
+}
+
+export function setIsRemoteMode(value: boolean): void {
+  STATE.isRemoteMode = value
+}
+
+// System prompt section accessors
+
+export function getSystemPromptSectionCache(): Map<string, string | null> {
+  return STATE.systemPromptSectionCache
+}
+
+export function setSystemPromptSectionCacheEntry(
+  name: string,
+  value: string | null,
+): void {
+  STATE.systemPromptSectionCache.set(name, value)
+}
+
+export function clearSystemPromptSectionState(): void {
+  STATE.systemPromptSectionCache.clear()
+}
+
+// Last emitted date accessors (for detecting midnight date changes)
+
+export function getLastEmittedDate(): string | null {
+  return STATE.lastEmittedDate
+}
+
+export function setLastEmittedDate(date: string | null): void {
+  STATE.lastEmittedDate = date
+}
+
+export function getAdditionalDirectoriesForClaudeMd(): string[] {
+  return STATE.additionalDirectoriesForClaudeMd
+}
+
+export function setAdditionalDirectoriesForClaudeMd(
+  directories: string[],
+): void {
+  STATE.additionalDirectoriesForClaudeMd = directories
+}
+
+export function getAllowedChannels(): ChannelEntry[] {
+  return STATE.allowedChannels
+}
+
+export function setAllowedChannels(entries: ChannelEntry[]): void {
+  STATE.allowedChannels = entries
+}
+
+export function getHasDevChannels(): boolean {
+  return STATE.hasDevChannels
+}
+
+export function setHasDevChannels(value: boolean): void {
+  STATE.hasDevChannels = value
+}
+
+export function getPromptCache1hAllowlist(): string[] | null {
+  return STATE.promptCache1hAllowlist
+}
+
+export function setPromptCache1hAllowlist(allowlist: string[] | null): void {
+  STATE.promptCache1hAllowlist = allowlist
+}
+
+export function getPromptCache1hEligible(): boolean | null {
+  return STATE.promptCache1hEligible
+}
+
+export function setPromptCache1hEligible(eligible: boolean | null): void {
+  STATE.promptCache1hEligible = eligible
+}
+
+export function getAfkModeHeaderLatched(): boolean | null {
+  return STATE.afkModeHeaderLatched
+}
+
+export function setAfkModeHeaderLatched(v: boolean): void {
+  STATE.afkModeHeaderLatched = v
+}
+
+export function getFastModeHeaderLatched(): boolean | null {
+  return STATE.fastModeHeaderLatched
+}
+
+export function setFastModeHeaderLatched(v: boolean): void {
+  STATE.fastModeHeaderLatched = v
+}
+
+export function getCacheEditingHeaderLatched(): boolean | null {
+  return STATE.cacheEditingHeaderLatched
+}
+
+export function setCacheEditingHeaderLatched(v: boolean): void {
+  STATE.cacheEditingHeaderLatched = v
+}
+
+export function getThinkingClearLatched(): boolean | null {
+  return STATE.thinkingClearLatched
+}
+
+export function setThinkingClearLatched(v: boolean): void {
+  STATE.thinkingClearLatched = v
+}
+
+/**
+ * Reset beta header latches to null. Called on /clear and /compact so a
+ * fresh conversation gets fresh header evaluation.
+ */
+export function clearBetaHeaderLatches(): void {
+  STATE.afkModeHeaderLatched = null
+  STATE.fastModeHeaderLatched = null
+  STATE.cacheEditingHeaderLatched = null
+  STATE.thinkingClearLatched = null
+}
+
+export function getPromptId(): string | null {
+  return STATE.promptId
+}
+
+export function setPromptId(id: string | null): void {
+  STATE.promptId = id
+}
+

+ 539 - 0
src/bridge/bridgeApi.ts

@@ -0,0 +1,539 @@
+import axios from 'axios'
+
+import { debugBody, extractErrorDetail } from './debugUtils.js'
+import {
+  BRIDGE_LOGIN_INSTRUCTION,
+  type BridgeApiClient,
+  type BridgeConfig,
+  type PermissionResponseEvent,
+  type WorkResponse,
+} from './types.js'
+
+type BridgeApiDeps = {
+  baseUrl: string
+  getAccessToken: () => string | undefined
+  runnerVersion: string
+  onDebug?: (msg: string) => void
+  /**
+   * Called on 401 to attempt OAuth token refresh. Returns true if refreshed,
+   * in which case the request is retried once. Injected because
+   * handleOAuth401Error from utils/auth.ts transitively pulls in config.ts →
+   * file.ts → permissions/filesystem.ts → sessionStorage.ts → commands.ts
+   * (~1300 modules). Daemon callers using env-var tokens omit this — their
+   * tokens don't refresh, so 401 goes straight to BridgeFatalError.
+   */
+  onAuth401?: (staleAccessToken: string) => Promise<boolean>
+  /**
+   * Returns the trusted device token to send as X-Trusted-Device-Token on
+   * bridge API calls. Bridge sessions have SecurityTier=ELEVATED on the
+   * server (CCR v2); when the server's enforcement flag is on,
+   * ConnectBridgeWorker requires a trusted device at JWT-issuance.
+   * Optional — when absent or returning undefined, the header is omitted
+   * and the server falls through to its flag-off/no-op path. The CLI-side
+   * gate is tengu_sessions_elevated_auth_enforcement (see trustedDevice.ts).
+   */
+  getTrustedDeviceToken?: () => string | undefined
+}
+
+const BETA_HEADER = 'environments-2025-11-01'
+
+/** Allowlist pattern for server-provided IDs used in URL path segments. */
+const SAFE_ID_PATTERN = /^[a-zA-Z0-9_-]+$/
+
+/**
+ * Validate that a server-provided ID is safe to interpolate into a URL path.
+ * Prevents path traversal (e.g. `../../admin`) and injection via IDs that
+ * contain slashes, dots, or other special characters.
+ */
+export function validateBridgeId(id: string, label: string): string {
+  if (!id || !SAFE_ID_PATTERN.test(id)) {
+    throw new Error(`Invalid ${label}: contains unsafe characters`)
+  }
+  return id
+}
+
+/** Fatal bridge errors that should not be retried (e.g. auth failures). */
+export class BridgeFatalError extends Error {
+  readonly status: number
+  /** Server-provided error type, e.g. "environment_expired". */
+  readonly errorType: string | undefined
+  constructor(message: string, status: number, errorType?: string) {
+    super(message)
+    this.name = 'BridgeFatalError'
+    this.status = status
+    this.errorType = errorType
+  }
+}
+
+export function createBridgeApiClient(deps: BridgeApiDeps): BridgeApiClient {
+  function debug(msg: string): void {
+    deps.onDebug?.(msg)
+  }
+
+  let consecutiveEmptyPolls = 0
+  const EMPTY_POLL_LOG_INTERVAL = 100
+
+  function getHeaders(accessToken: string): Record<string, string> {
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${accessToken}`,
+      'Content-Type': 'application/json',
+      'anthropic-version': '2023-06-01',
+      'anthropic-beta': BETA_HEADER,
+      'x-environment-runner-version': deps.runnerVersion,
+    }
+    const deviceToken = deps.getTrustedDeviceToken?.()
+    if (deviceToken) {
+      headers['X-Trusted-Device-Token'] = deviceToken
+    }
+    return headers
+  }
+
+  function resolveAuth(): string {
+    const accessToken = deps.getAccessToken()
+    if (!accessToken) {
+      throw new Error(BRIDGE_LOGIN_INSTRUCTION)
+    }
+    return accessToken
+  }
+
+  /**
+   * Execute an OAuth-authenticated request with a single retry on 401.
+   * On 401, attempts token refresh via handleOAuth401Error (same pattern as
+   * withRetry.ts for v1/messages). If refresh succeeds, retries the request
+   * once with the new token. If refresh fails or the retry also returns 401,
+   * the 401 response is returned for handleErrorStatus to throw BridgeFatalError.
+   */
+  async function withOAuthRetry<T>(
+    fn: (accessToken: string) => Promise<{ status: number; data: T }>,
+    context: string,
+  ): Promise<{ status: number; data: T }> {
+    const accessToken = resolveAuth()
+    const response = await fn(accessToken)
+
+    if (response.status !== 401) {
+      return response
+    }
+
+    if (!deps.onAuth401) {
+      debug(`[bridge:api] ${context}: 401 received, no refresh handler`)
+      return response
+    }
+
+    // Attempt token refresh — matches the pattern in withRetry.ts
+    debug(`[bridge:api] ${context}: 401 received, attempting token refresh`)
+    const refreshed = await deps.onAuth401(accessToken)
+    if (refreshed) {
+      debug(`[bridge:api] ${context}: Token refreshed, retrying request`)
+      const newToken = resolveAuth()
+      const retryResponse = await fn(newToken)
+      if (retryResponse.status !== 401) {
+        return retryResponse
+      }
+      debug(`[bridge:api] ${context}: Retry after refresh also got 401`)
+    } else {
+      debug(`[bridge:api] ${context}: Token refresh failed`)
+    }
+
+    // Refresh failed — return 401 for handleErrorStatus to throw
+    return response
+  }
+
+  return {
+    async registerBridgeEnvironment(
+      config: BridgeConfig,
+    ): Promise<{ environment_id: string; environment_secret: string }> {
+      debug(
+        `[bridge:api] POST /v1/environments/bridge bridgeId=${config.bridgeId}`,
+      )
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post<{
+            environment_id: string
+            environment_secret: string
+          }>(
+            `${deps.baseUrl}/v1/environments/bridge`,
+            {
+              machine_name: config.machineName,
+              directory: config.dir,
+              branch: config.branch,
+              git_repo_url: config.gitRepoUrl,
+              // Advertise session capacity so claude.ai/code can show
+              // "2/4 sessions" badges and only block the picker when
+              // actually at capacity. Backends that don't yet accept
+              // this field will silently ignore it.
+              max_sessions: config.maxSessions,
+              // worker_type lets claude.ai filter environments by origin
+              // (e.g. assistant picker only shows assistant-mode workers).
+              // Desktop cowork app sends "cowork"; we send a distinct value.
+              metadata: { worker_type: config.workerType },
+              // Idempotent re-registration: if we have a backend-issued
+              // environment_id from a prior session (--session-id resume),
+              // send it back so the backend reattaches instead of creating
+              // a new env. The backend may still hand back a fresh ID if
+              // the old one expired — callers must compare the response.
+              ...(config.reuseEnvironmentId && {
+                environment_id: config.reuseEnvironmentId,
+              }),
+            },
+            {
+              headers: getHeaders(token),
+              timeout: 15_000,
+              validateStatus: status => status < 500,
+            },
+          ),
+        'Registration',
+      )
+
+      handleErrorStatus(response.status, response.data, 'Registration')
+      debug(
+        `[bridge:api] POST /v1/environments/bridge -> ${response.status} environment_id=${response.data.environment_id}`,
+      )
+      debug(
+        `[bridge:api] >>> ${debugBody({ machine_name: config.machineName, directory: config.dir, branch: config.branch, git_repo_url: config.gitRepoUrl, max_sessions: config.maxSessions, metadata: { worker_type: config.workerType } })}`,
+      )
+      debug(`[bridge:api] <<< ${debugBody(response.data)}`)
+      return response.data
+    },
+
+    async pollForWork(
+      environmentId: string,
+      environmentSecret: string,
+      signal?: AbortSignal,
+      reclaimOlderThanMs?: number,
+    ): Promise<WorkResponse | null> {
+      validateBridgeId(environmentId, 'environmentId')
+
+      // Save and reset so errors break the "consecutive empty" streak.
+      // Restored below when the response is truly empty.
+      const prevEmptyPolls = consecutiveEmptyPolls
+      consecutiveEmptyPolls = 0
+
+      const response = await axios.get<WorkResponse | null>(
+        `${deps.baseUrl}/v1/environments/${environmentId}/work/poll`,
+        {
+          headers: getHeaders(environmentSecret),
+          params:
+            reclaimOlderThanMs !== undefined
+              ? { reclaim_older_than_ms: reclaimOlderThanMs }
+              : undefined,
+          timeout: 10_000,
+          signal,
+          validateStatus: status => status < 500,
+        },
+      )
+
+      handleErrorStatus(response.status, response.data, 'Poll')
+
+      // Empty body or null = no work available
+      if (!response.data) {
+        consecutiveEmptyPolls = prevEmptyPolls + 1
+        if (
+          consecutiveEmptyPolls === 1 ||
+          consecutiveEmptyPolls % EMPTY_POLL_LOG_INTERVAL === 0
+        ) {
+          debug(
+            `[bridge:api] GET .../work/poll -> ${response.status} (no work, ${consecutiveEmptyPolls} consecutive empty polls)`,
+          )
+        }
+        return null
+      }
+
+      debug(
+        `[bridge:api] GET .../work/poll -> ${response.status} workId=${response.data.id} type=${response.data.data?.type}${response.data.data?.id ? ` sessionId=${response.data.data.id}` : ''}`,
+      )
+      debug(`[bridge:api] <<< ${debugBody(response.data)}`)
+      return response.data
+    },
+
+    async acknowledgeWork(
+      environmentId: string,
+      workId: string,
+      sessionToken: string,
+    ): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(workId, 'workId')
+
+      debug(`[bridge:api] POST .../work/${workId}/ack`)
+
+      const response = await axios.post(
+        `${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/ack`,
+        {},
+        {
+          headers: getHeaders(sessionToken),
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+
+      handleErrorStatus(response.status, response.data, 'Acknowledge')
+      debug(`[bridge:api] POST .../work/${workId}/ack -> ${response.status}`)
+    },
+
+    async stopWork(
+      environmentId: string,
+      workId: string,
+      force: boolean,
+    ): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(workId, 'workId')
+
+      debug(`[bridge:api] POST .../work/${workId}/stop force=${force}`)
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post(
+            `${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/stop`,
+            { force },
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'StopWork',
+      )
+
+      handleErrorStatus(response.status, response.data, 'StopWork')
+      debug(`[bridge:api] POST .../work/${workId}/stop -> ${response.status}`)
+    },
+
+    async deregisterEnvironment(environmentId: string): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+
+      debug(`[bridge:api] DELETE /v1/environments/bridge/${environmentId}`)
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.delete(
+            `${deps.baseUrl}/v1/environments/bridge/${environmentId}`,
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'Deregister',
+      )
+
+      handleErrorStatus(response.status, response.data, 'Deregister')
+      debug(
+        `[bridge:api] DELETE /v1/environments/bridge/${environmentId} -> ${response.status}`,
+      )
+    },
+
+    async archiveSession(sessionId: string): Promise<void> {
+      validateBridgeId(sessionId, 'sessionId')
+
+      debug(`[bridge:api] POST /v1/sessions/${sessionId}/archive`)
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post(
+            `${deps.baseUrl}/v1/sessions/${sessionId}/archive`,
+            {},
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'ArchiveSession',
+      )
+
+      // 409 = already archived (idempotent, not an error)
+      if (response.status === 409) {
+        debug(
+          `[bridge:api] POST /v1/sessions/${sessionId}/archive -> 409 (already archived)`,
+        )
+        return
+      }
+
+      handleErrorStatus(response.status, response.data, 'ArchiveSession')
+      debug(
+        `[bridge:api] POST /v1/sessions/${sessionId}/archive -> ${response.status}`,
+      )
+    },
+
+    async reconnectSession(
+      environmentId: string,
+      sessionId: string,
+    ): Promise<void> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(sessionId, 'sessionId')
+
+      debug(
+        `[bridge:api] POST /v1/environments/${environmentId}/bridge/reconnect session_id=${sessionId}`,
+      )
+
+      const response = await withOAuthRetry(
+        (token: string) =>
+          axios.post(
+            `${deps.baseUrl}/v1/environments/${environmentId}/bridge/reconnect`,
+            { session_id: sessionId },
+            {
+              headers: getHeaders(token),
+              timeout: 10_000,
+              validateStatus: s => s < 500,
+            },
+          ),
+        'ReconnectSession',
+      )
+
+      handleErrorStatus(response.status, response.data, 'ReconnectSession')
+      debug(`[bridge:api] POST .../bridge/reconnect -> ${response.status}`)
+    },
+
+    async heartbeatWork(
+      environmentId: string,
+      workId: string,
+      sessionToken: string,
+    ): Promise<{ lease_extended: boolean; state: string }> {
+      validateBridgeId(environmentId, 'environmentId')
+      validateBridgeId(workId, 'workId')
+
+      debug(`[bridge:api] POST .../work/${workId}/heartbeat`)
+
+      const response = await axios.post<{
+        lease_extended: boolean
+        state: string
+        last_heartbeat: string
+        ttl_seconds: number
+      }>(
+        `${deps.baseUrl}/v1/environments/${environmentId}/work/${workId}/heartbeat`,
+        {},
+        {
+          headers: getHeaders(sessionToken),
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+
+      handleErrorStatus(response.status, response.data, 'Heartbeat')
+      debug(
+        `[bridge:api] POST .../work/${workId}/heartbeat -> ${response.status} lease_extended=${response.data.lease_extended} state=${response.data.state}`,
+      )
+      return response.data
+    },
+
+    async sendPermissionResponseEvent(
+      sessionId: string,
+      event: PermissionResponseEvent,
+      sessionToken: string,
+    ): Promise<void> {
+      validateBridgeId(sessionId, 'sessionId')
+
+      debug(
+        `[bridge:api] POST /v1/sessions/${sessionId}/events type=${event.type}`,
+      )
+
+      const response = await axios.post(
+        `${deps.baseUrl}/v1/sessions/${sessionId}/events`,
+        { events: [event] },
+        {
+          headers: getHeaders(sessionToken),
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+
+      handleErrorStatus(
+        response.status,
+        response.data,
+        'SendPermissionResponseEvent',
+      )
+      debug(
+        `[bridge:api] POST /v1/sessions/${sessionId}/events -> ${response.status}`,
+      )
+      debug(`[bridge:api] >>> ${debugBody({ events: [event] })}`)
+      debug(`[bridge:api] <<< ${debugBody(response.data)}`)
+    },
+  }
+}
+
+function handleErrorStatus(
+  status: number,
+  data: unknown,
+  context: string,
+): void {
+  if (status === 200 || status === 204) {
+    return
+  }
+  const detail = extractErrorDetail(data)
+  const errorType = extractErrorTypeFromData(data)
+  switch (status) {
+    case 401:
+      throw new BridgeFatalError(
+        `${context}: Authentication failed (401)${detail ? `: ${detail}` : ''}. ${BRIDGE_LOGIN_INSTRUCTION}`,
+        401,
+        errorType,
+      )
+    case 403:
+      throw new BridgeFatalError(
+        isExpiredErrorType(errorType)
+          ? 'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.'
+          : `${context}: Access denied (403)${detail ? `: ${detail}` : ''}. Check your organization permissions.`,
+        403,
+        errorType,
+      )
+    case 404:
+      throw new BridgeFatalError(
+        detail ??
+          `${context}: Not found (404). Remote Control may not be available for this organization.`,
+        404,
+        errorType,
+      )
+    case 410:
+      throw new BridgeFatalError(
+        detail ??
+          'Remote Control session has expired. Please restart with `claude remote-control` or /remote-control.',
+        410,
+        errorType ?? 'environment_expired',
+      )
+    case 429:
+      throw new Error(`${context}: Rate limited (429). Polling too frequently.`)
+    default:
+      throw new Error(
+        `${context}: Failed with status ${status}${detail ? `: ${detail}` : ''}`,
+      )
+  }
+}
+
+/** Check whether an error type string indicates a session/environment expiry. */
+export function isExpiredErrorType(errorType: string | undefined): boolean {
+  if (!errorType) {
+    return false
+  }
+  return errorType.includes('expired') || errorType.includes('lifetime')
+}
+
+/**
+ * Check whether a BridgeFatalError is a suppressible 403 permission error.
+ * These are 403 errors for scopes like 'external_poll_sessions' or operations
+ * like StopWork that fail because the user's role lacks 'environments:manage'.
+ * They don't affect core functionality and shouldn't be shown to users.
+ */
+export function isSuppressible403(err: BridgeFatalError): boolean {
+  if (err.status !== 403) {
+    return false
+  }
+  return (
+    err.message.includes('external_poll_sessions') ||
+    err.message.includes('environments:manage')
+  )
+}
+
+function extractErrorTypeFromData(data: unknown): string | undefined {
+  if (data && typeof data === 'object') {
+    if (
+      'error' in data &&
+      data.error &&
+      typeof data.error === 'object' &&
+      'type' in data.error &&
+      typeof data.error.type === 'string'
+    ) {
+      return data.error.type
+    }
+  }
+  return undefined
+}

+ 48 - 0
src/bridge/bridgeConfig.ts

@@ -0,0 +1,48 @@
+/**
+ * Shared bridge auth/URL resolution. Consolidates the ant-only
+ * CLAUDE_BRIDGE_* dev overrides that were previously copy-pasted across
+ * a dozen files — inboundAttachments, BriefTool/upload, bridgeMain,
+ * initReplBridge, remoteBridgeCore, daemon workers, /rename,
+ * /remote-control.
+ *
+ * Two layers: *Override() returns the ant-only env var (or undefined);
+ * the non-Override versions fall through to the real OAuth store/config.
+ * Callers that compose with a different auth source (e.g. daemon workers
+ * using IPC auth) use the Override getters directly.
+ */
+
+import { getOauthConfig } from '../constants/oauth.js'
+import { getClaudeAIOAuthTokens } from '../utils/auth.js'
+
+/** Ant-only dev override: CLAUDE_BRIDGE_OAUTH_TOKEN, else undefined. */
+export function getBridgeTokenOverride(): string | undefined {
+  return (
+    (process.env.USER_TYPE === 'ant' &&
+      process.env.CLAUDE_BRIDGE_OAUTH_TOKEN) ||
+    undefined
+  )
+}
+
+/** Ant-only dev override: CLAUDE_BRIDGE_BASE_URL, else undefined. */
+export function getBridgeBaseUrlOverride(): string | undefined {
+  return (
+    (process.env.USER_TYPE === 'ant' && process.env.CLAUDE_BRIDGE_BASE_URL) ||
+    undefined
+  )
+}
+
+/**
+ * Access token for bridge API calls: dev override first, then the OAuth
+ * keychain. Undefined means "not logged in".
+ */
+export function getBridgeAccessToken(): string | undefined {
+  return getBridgeTokenOverride() ?? getClaudeAIOAuthTokens()?.accessToken
+}
+
+/**
+ * Base URL for bridge API calls: dev override first, then the production
+ * OAuth config. Always returns a URL.
+ */
+export function getBridgeBaseUrl(): string {
+  return getBridgeBaseUrlOverride() ?? getOauthConfig().BASE_API_URL
+}

+ 135 - 0
src/bridge/bridgeDebug.ts

@@ -0,0 +1,135 @@
+import { logForDebugging } from '../utils/debug.js'
+import { BridgeFatalError } from './bridgeApi.js'
+import type { BridgeApiClient } from './types.js'
+
+/**
+ * Ant-only fault injection for manually testing bridge recovery paths.
+ *
+ * Real failure modes this targets (BQ 2026-03-12, 7-day window):
+ *   poll 404 not_found_error   — 147K sessions/week, dead onEnvironmentLost gate
+ *   ws_closed 1002/1006        —  22K sessions/week, zombie poll after close
+ *   register transient failure —  residual: network blips during doReconnect
+ *
+ * Usage: /bridge-kick <subcommand> from the REPL while Remote Control is
+ * connected, then tail debug.log to watch the recovery machinery react.
+ *
+ * Module-level state is intentional here: one bridge per REPL process, the
+ * /bridge-kick slash command has no other way to reach into initBridgeCore's
+ * closures, and teardown clears the slot.
+ */
+
+/** One-shot fault to inject on the next matching api call. */
+type BridgeFault = {
+  method:
+    | 'pollForWork'
+    | 'registerBridgeEnvironment'
+    | 'reconnectSession'
+    | 'heartbeatWork'
+  /** Fatal errors go through handleErrorStatus → BridgeFatalError. Transient
+   *  errors surface as plain axios rejections (5xx / network). Recovery code
+   *  distinguishes the two: fatal → teardown, transient → retry/backoff. */
+  kind: 'fatal' | 'transient'
+  status: number
+  errorType?: string
+  /** Remaining injections. Decremented on consume; removed at 0. */
+  count: number
+}
+
+export type BridgeDebugHandle = {
+  /** Invoke the transport's permanent-close handler directly. Tests the
+   *  ws_closed → reconnectEnvironmentWithSession escalation (#22148). */
+  fireClose: (code: number) => void
+  /** Call reconnectEnvironmentWithSession() — same as SIGUSR2 but
+   *  reachable from the slash command. */
+  forceReconnect: () => void
+  /** Queue a fault for the next N calls to the named api method. */
+  injectFault: (fault: BridgeFault) => void
+  /** Abort the at-capacity sleep so an injected poll fault lands
+   *  immediately instead of up to 10min later. */
+  wakePollLoop: () => void
+  /** env/session IDs for the debug.log grep. */
+  describe: () => string
+}
+
+let debugHandle: BridgeDebugHandle | null = null
+const faultQueue: BridgeFault[] = []
+
+export function registerBridgeDebugHandle(h: BridgeDebugHandle): void {
+  debugHandle = h
+}
+
+export function clearBridgeDebugHandle(): void {
+  debugHandle = null
+  faultQueue.length = 0
+}
+
+export function getBridgeDebugHandle(): BridgeDebugHandle | null {
+  return debugHandle
+}
+
+export function injectBridgeFault(fault: BridgeFault): void {
+  faultQueue.push(fault)
+  logForDebugging(
+    `[bridge:debug] Queued fault: ${fault.method} ${fault.kind}/${fault.status}${fault.errorType ? `/${fault.errorType}` : ''} ×${fault.count}`,
+  )
+}
+
+/**
+ * Wrap a BridgeApiClient so each call first checks the fault queue. If a
+ * matching fault is queued, throw the specified error instead of calling
+ * through. Delegates everything else to the real client.
+ *
+ * Only called when USER_TYPE === 'ant' — zero overhead in external builds.
+ */
+export function wrapApiForFaultInjection(
+  api: BridgeApiClient,
+): BridgeApiClient {
+  function consume(method: BridgeFault['method']): BridgeFault | null {
+    const idx = faultQueue.findIndex(f => f.method === method)
+    if (idx === -1) return null
+    const fault = faultQueue[idx]!
+    fault.count--
+    if (fault.count <= 0) faultQueue.splice(idx, 1)
+    return fault
+  }
+
+  function throwFault(fault: BridgeFault, context: string): never {
+    logForDebugging(
+      `[bridge:debug] Injecting ${fault.kind} fault into ${context}: status=${fault.status} errorType=${fault.errorType ?? 'none'}`,
+    )
+    if (fault.kind === 'fatal') {
+      throw new BridgeFatalError(
+        `[injected] ${context} ${fault.status}`,
+        fault.status,
+        fault.errorType,
+      )
+    }
+    // Transient: mimic an axios rejection (5xx / network). No .status on
+    // the error itself — that's how the catch blocks distinguish.
+    throw new Error(`[injected transient] ${context} ${fault.status}`)
+  }
+
+  return {
+    ...api,
+    async pollForWork(envId, secret, signal, reclaimMs) {
+      const f = consume('pollForWork')
+      if (f) throwFault(f, 'Poll')
+      return api.pollForWork(envId, secret, signal, reclaimMs)
+    },
+    async registerBridgeEnvironment(config) {
+      const f = consume('registerBridgeEnvironment')
+      if (f) throwFault(f, 'Registration')
+      return api.registerBridgeEnvironment(config)
+    },
+    async reconnectSession(envId, sessionId) {
+      const f = consume('reconnectSession')
+      if (f) throwFault(f, 'ReconnectSession')
+      return api.reconnectSession(envId, sessionId)
+    },
+    async heartbeatWork(envId, workId, token) {
+      const f = consume('heartbeatWork')
+      if (f) throwFault(f, 'Heartbeat')
+      return api.heartbeatWork(envId, workId, token)
+    },
+  }
+}

+ 202 - 0
src/bridge/bridgeEnabled.ts

@@ -0,0 +1,202 @@
+import { feature } from 'bun:bundle'
+import {
+  checkGate_CACHED_OR_BLOCKING,
+  getDynamicConfig_CACHED_MAY_BE_STALE,
+  getFeatureValue_CACHED_MAY_BE_STALE,
+} from '../services/analytics/growthbook.js'
+// Namespace import breaks the bridgeEnabled → auth → config → bridgeEnabled
+// cycle — authModule.foo is a live binding, so by the time the helpers below
+// call it, auth.js is fully loaded. Previously used require() for the same
+// deferral, but require() hits a CJS cache that diverges from the ESM
+// namespace after mock.module() (daemon/auth.test.ts), breaking spyOn.
+import * as authModule from '../utils/auth.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
+import { lt } from '../utils/semver.js'
+
+/**
+ * Runtime check for bridge mode entitlement.
+ *
+ * Remote Control requires a claude.ai subscription (the bridge auths to CCR
+ * with the claude.ai OAuth token). isClaudeAISubscriber() excludes
+ * Bedrock/Vertex/Foundry, apiKeyHelper/gateway deployments, env-var API keys,
+ * and Console API logins — none of which have the OAuth token CCR needs.
+ * See github.com/deshaw/anthropic-issues/issues/24.
+ *
+ * The `feature('BRIDGE_MODE')` guard ensures the GrowthBook string literal
+ * is only referenced when bridge mode is enabled at build time.
+ */
+export function isBridgeEnabled(): boolean {
+  // Positive ternary pattern — see docs/feature-gating.md.
+  // Negative pattern (if (!feature(...)) return) does not eliminate
+  // inline string literals from external builds.
+  return feature('BRIDGE_MODE')
+    ? isClaudeAISubscriber() &&
+        getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_bridge', false)
+    : false
+}
+
+/**
+ * Blocking entitlement check for Remote Control.
+ *
+ * Returns cached `true` immediately (fast path). If the disk cache says
+ * `false` or is missing, awaits GrowthBook init and fetches the fresh
+ * server value (slow path, max ~5s), then writes it to disk.
+ *
+ * Use at entitlement gates where a stale `false` would unfairly block access.
+ * For user-facing error paths, prefer `getBridgeDisabledReason()` which gives
+ * a specific diagnostic. For render-body UI visibility checks, use
+ * `isBridgeEnabled()` instead.
+ */
+export async function isBridgeEnabledBlocking(): Promise<boolean> {
+  return feature('BRIDGE_MODE')
+    ? isClaudeAISubscriber() &&
+        (await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))
+    : false
+}
+
+/**
+ * Diagnostic message for why Remote Control is unavailable, or null if
+ * it's enabled. Call this instead of a bare `isBridgeEnabledBlocking()`
+ * check when you need to show the user an actionable error.
+ *
+ * The GrowthBook gate targets on organizationUUID, which comes from
+ * config.oauthAccount — populated by /api/oauth/profile during login.
+ * That endpoint requires the user:profile scope. Tokens without it
+ * (setup-token, CLAUDE_CODE_OAUTH_TOKEN env var, or pre-scope-expansion
+ * logins) leave oauthAccount unpopulated, so the gate falls back to
+ * false and users see a dead-end "not enabled" message with no hint
+ * that re-login would fix it. See CC-1165 / gh-33105.
+ */
+export async function getBridgeDisabledReason(): Promise<string | null> {
+  if (feature('BRIDGE_MODE')) {
+    if (!isClaudeAISubscriber()) {
+      return 'Remote Control requires a claude.ai subscription. Run `claude auth login` to sign in with your claude.ai account.'
+    }
+    if (!hasProfileScope()) {
+      return 'Remote Control requires a full-scope login token. Long-lived tokens (from `claude setup-token` or CLAUDE_CODE_OAUTH_TOKEN) are limited to inference-only for security reasons. Run `claude auth login` to use Remote Control.'
+    }
+    if (!getOauthAccountInfo()?.organizationUuid) {
+      return 'Unable to determine your organization for Remote Control eligibility. Run `claude auth login` to refresh your account information.'
+    }
+    if (!(await checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge'))) {
+      return 'Remote Control is not yet enabled for your account.'
+    }
+    return null
+  }
+  return 'Remote Control is not available in this build.'
+}
+
+// try/catch: main.tsx:5698 calls isBridgeEnabled() while defining the Commander
+// program, before enableConfigs() runs. isClaudeAISubscriber() → getGlobalConfig()
+// throws "Config accessed before allowed" there. Pre-config, no OAuth token can
+// exist anyway — false is correct. Same swallow getFeatureValue_CACHED_MAY_BE_STALE
+// already does at growthbook.ts:775-780.
+function isClaudeAISubscriber(): boolean {
+  try {
+    return authModule.isClaudeAISubscriber()
+  } catch {
+    return false
+  }
+}
+function hasProfileScope(): boolean {
+  try {
+    return authModule.hasProfileScope()
+  } catch {
+    return false
+  }
+}
+function getOauthAccountInfo(): ReturnType<
+  typeof authModule.getOauthAccountInfo
+> {
+  try {
+    return authModule.getOauthAccountInfo()
+  } catch {
+    return undefined
+  }
+}
+
+/**
+ * Runtime check for the env-less (v2) REPL bridge path.
+ * Returns true when the GrowthBook flag `tengu_bridge_repl_v2` is enabled.
+ *
+ * This gates which implementation initReplBridge uses — NOT whether bridge
+ * is available at all (see isBridgeEnabled above). Daemon/print paths stay
+ * on the env-based implementation regardless of this gate.
+ */
+export function isEnvLessBridgeEnabled(): boolean {
+  return feature('BRIDGE_MODE')
+    ? getFeatureValue_CACHED_MAY_BE_STALE('tengu_bridge_repl_v2', false)
+    : false
+}
+
+/**
+ * Kill-switch for the `cse_*` → `session_*` client-side retag shim.
+ *
+ * The shim exists because compat/convert.go:27 validates TagSession and the
+ * claude.ai frontend routes on `session_*`, while v2 worker endpoints hand out
+ * `cse_*`. Once the server tags by environment_kind and the frontend accepts
+ * `cse_*` directly, flip this to false to make toCompatSessionId a no-op.
+ * Defaults to true — the shim stays active until explicitly disabled.
+ */
+export function isCseShimEnabled(): boolean {
+  return feature('BRIDGE_MODE')
+    ? getFeatureValue_CACHED_MAY_BE_STALE(
+        'tengu_bridge_repl_v2_cse_shim_enabled',
+        true,
+      )
+    : true
+}
+
+/**
+ * Returns an error message if the current CLI version is below the
+ * minimum required for the v1 (env-based) Remote Control path, or null if the
+ * version is fine. The v2 (env-less) path uses checkEnvLessBridgeMinVersion()
+ * in envLessBridgeConfig.ts instead — the two implementations have independent
+ * version floors.
+ *
+ * Uses cached (non-blocking) GrowthBook config. If GrowthBook hasn't
+ * loaded yet, the default '0.0.0' means the check passes — a safe fallback.
+ */
+export function checkBridgeMinVersion(): string | null {
+  // Positive pattern — see docs/feature-gating.md.
+  // Negative pattern (if (!feature(...)) return) does not eliminate
+  // inline string literals from external builds.
+  if (feature('BRIDGE_MODE')) {
+    const config = getDynamicConfig_CACHED_MAY_BE_STALE<{
+      minVersion: string
+    }>('tengu_bridge_min_version', { minVersion: '0.0.0' })
+    if (config.minVersion && lt(MACRO.VERSION, config.minVersion)) {
+      return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${config.minVersion} or higher is required. Run \`claude update\` to update.`
+    }
+  }
+  return null
+}
+
+/**
+ * Default for remoteControlAtStartup when the user hasn't explicitly set it.
+ * When the CCR_AUTO_CONNECT build flag is present (ant-only) and the
+ * tengu_cobalt_harbor GrowthBook gate is on, all sessions connect to CCR by
+ * default — the user can still opt out by setting remoteControlAtStartup=false
+ * in config (explicit settings always win over this default).
+ *
+ * Defined here rather than in config.ts to avoid a direct
+ * config.ts → growthbook.ts import cycle (growthbook.ts → user.ts → config.ts).
+ */
+export function getCcrAutoConnectDefault(): boolean {
+  return feature('CCR_AUTO_CONNECT')
+    ? getFeatureValue_CACHED_MAY_BE_STALE('tengu_cobalt_harbor', false)
+    : false
+}
+
+/**
+ * Opt-in CCR mirror mode — every local session spawns an outbound-only
+ * Remote Control session that receives forwarded events. Separate from
+ * getCcrAutoConnectDefault (bidirectional Remote Control). Env var wins for
+ * local opt-in; GrowthBook controls rollout.
+ */
+export function isCcrMirrorEnabled(): boolean {
+  return feature('CCR_MIRROR')
+    ? isEnvTruthy(process.env.CLAUDE_CODE_CCR_MIRROR) ||
+        getFeatureValue_CACHED_MAY_BE_STALE('tengu_ccr_mirror', false)
+    : false
+}

+ 2999 - 0
src/bridge/bridgeMain.ts

@@ -0,0 +1,2999 @@
+import { feature } from 'bun:bundle'
+import { randomUUID } from 'crypto'
+import { hostname, tmpdir } from 'os'
+import { basename, join, resolve } from 'path'
+import { getRemoteSessionUrl } from '../constants/product.js'
+import { shutdownDatadog } from '../services/analytics/datadog.js'
+import { shutdown1PEventLogging } from '../services/analytics/firstPartyEventLogger.js'
+import { checkGate_CACHED_OR_BLOCKING } from '../services/analytics/growthbook.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+  logEventAsync,
+} from '../services/analytics/index.js'
+import { isInBundledMode } from '../utils/bundledMode.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
+import { errorMessage } from '../utils/errors.js'
+import { truncateToWidth } from '../utils/format.js'
+import { logError } from '../utils/log.js'
+import { sleep } from '../utils/sleep.js'
+import { createAgentWorktree, removeAgentWorktree } from '../utils/worktree.js'
+import {
+  BridgeFatalError,
+  createBridgeApiClient,
+  isExpiredErrorType,
+  isSuppressible403,
+  validateBridgeId,
+} from './bridgeApi.js'
+import { formatDuration } from './bridgeStatusUtil.js'
+import { createBridgeLogger } from './bridgeUI.js'
+import { createCapacityWake } from './capacityWake.js'
+import { describeAxiosError } from './debugUtils.js'
+import { createTokenRefreshScheduler } from './jwtUtils.js'
+import { getPollIntervalConfig } from './pollConfig.js'
+import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
+import { createSessionSpawner, safeFilenameId } from './sessionRunner.js'
+import { getTrustedDeviceToken } from './trustedDevice.js'
+import {
+  BRIDGE_LOGIN_ERROR,
+  type BridgeApiClient,
+  type BridgeConfig,
+  type BridgeLogger,
+  DEFAULT_SESSION_TIMEOUT_MS,
+  type SessionDoneStatus,
+  type SessionHandle,
+  type SessionSpawner,
+  type SessionSpawnOpts,
+  type SpawnMode,
+} from './types.js'
+import {
+  buildCCRv2SdkUrl,
+  buildSdkUrl,
+  decodeWorkSecret,
+  registerWorker,
+  sameSessionId,
+} from './workSecret.js'
+
+export type BackoffConfig = {
+  connInitialMs: number
+  connCapMs: number
+  connGiveUpMs: number
+  generalInitialMs: number
+  generalCapMs: number
+  generalGiveUpMs: number
+  /** SIGTERM→SIGKILL grace period on shutdown. Default 30s. */
+  shutdownGraceMs?: number
+  /** stopWorkWithRetry base delay (1s/2s/4s backoff). Default 1000ms. */
+  stopWorkBaseDelayMs?: number
+}
+
+const DEFAULT_BACKOFF: BackoffConfig = {
+  connInitialMs: 2_000,
+  connCapMs: 120_000, // 2 minutes
+  connGiveUpMs: 600_000, // 10 minutes
+  generalInitialMs: 500,
+  generalCapMs: 30_000,
+  generalGiveUpMs: 600_000, // 10 minutes
+}
+
+/** Status update interval for the live display (ms). */
+const STATUS_UPDATE_INTERVAL_MS = 1_000
+const SPAWN_SESSIONS_DEFAULT = 32
+
+/**
+ * GrowthBook gate for multi-session spawn modes (--spawn / --capacity / --create-session-in-dir).
+ * Sibling of tengu_ccr_bridge_multi_environment (multiple envs per host:dir) —
+ * this one enables multiple sessions per environment.
+ * Rollout staged via targeting rules: ants first, then gradual external.
+ *
+ * Uses the blocking gate check so a stale disk-cache miss doesn't unfairly
+ * deny access. The fast path (cache has true) is still instant; only the
+ * cold-start path awaits the server fetch, and that fetch also seeds the
+ * disk cache for next time.
+ */
+async function isMultiSessionSpawnEnabled(): Promise<boolean> {
+  return checkGate_CACHED_OR_BLOCKING('tengu_ccr_bridge_multi_session')
+}
+
+/**
+ * Returns the threshold for detecting system sleep/wake in the poll loop.
+ * Must exceed the max backoff cap — otherwise normal backoff delays trigger
+ * false sleep detection (resetting the error budget indefinitely). Using
+ * 2× the connection backoff cap, matching the pattern in WebSocketTransport
+ * and replBridge.
+ */
+function pollSleepDetectionThresholdMs(backoff: BackoffConfig): number {
+  return backoff.connCapMs * 2
+}
+
+/**
+ * Returns the args that must precede CLI flags when spawning a child claude
+ * process. In compiled binaries, process.execPath is the claude binary itself
+ * and args go directly to it. In npm installs (node running cli.js),
+ * process.execPath is the node runtime — the child spawn must pass the script
+ * path as the first arg, otherwise node interprets --sdk-url as a node option
+ * and exits with "bad option: --sdk-url". See anthropics/claude-code#28334.
+ */
+function spawnScriptArgs(): string[] {
+  if (isInBundledMode() || !process.argv[1]) {
+    return []
+  }
+  return [process.argv[1]]
+}
+
+/** Attempt to spawn a session; returns error string if spawn throws. */
+function safeSpawn(
+  spawner: SessionSpawner,
+  opts: SessionSpawnOpts,
+  dir: string,
+): SessionHandle | string {
+  try {
+    return spawner.spawn(opts, dir)
+  } catch (err) {
+    const errMsg = errorMessage(err)
+    logError(new Error(`Session spawn failed: ${errMsg}`))
+    return errMsg
+  }
+}
+
+export async function runBridgeLoop(
+  config: BridgeConfig,
+  environmentId: string,
+  environmentSecret: string,
+  api: BridgeApiClient,
+  spawner: SessionSpawner,
+  logger: BridgeLogger,
+  signal: AbortSignal,
+  backoffConfig: BackoffConfig = DEFAULT_BACKOFF,
+  initialSessionId?: string,
+  getAccessToken?: () => string | undefined | Promise<string | undefined>,
+): Promise<void> {
+  // Local abort controller so that onSessionDone can stop the poll loop.
+  // Linked to the incoming signal so external aborts also work.
+  const controller = new AbortController()
+  if (signal.aborted) {
+    controller.abort()
+  } else {
+    signal.addEventListener('abort', () => controller.abort(), { once: true })
+  }
+  const loopSignal = controller.signal
+
+  const activeSessions = new Map<string, SessionHandle>()
+  const sessionStartTimes = new Map<string, number>()
+  const sessionWorkIds = new Map<string, string>()
+  // Compat-surface ID (session_*) computed once at spawn and cached so
+  // cleanup and status-update ticks use the same key regardless of whether
+  // the tengu_bridge_repl_v2_cse_shim_enabled gate flips mid-session.
+  const sessionCompatIds = new Map<string, string>()
+  // Session ingress JWTs for heartbeat auth, keyed by sessionId.
+  // Stored separately from handle.accessToken because the token refresh
+  // scheduler overwrites that field with the OAuth token (~3h55m in).
+  const sessionIngressTokens = new Map<string, string>()
+  const sessionTimers = new Map<string, ReturnType<typeof setTimeout>>()
+  const completedWorkIds = new Set<string>()
+  const sessionWorktrees = new Map<
+    string,
+    {
+      worktreePath: string
+      worktreeBranch?: string
+      gitRoot?: string
+      hookBased?: boolean
+    }
+  >()
+  // Track sessions killed by the timeout watchdog so onSessionDone can
+  // distinguish them from server-initiated or shutdown interrupts.
+  const timedOutSessions = new Set<string>()
+  // Sessions that already have a title (server-set or bridge-derived) so
+  // onFirstUserMessage doesn't clobber a user-assigned --name / web rename.
+  // Keyed by compatSessionId to match logger.setSessionTitle's key.
+  const titledSessions = new Set<string>()
+  // Signal to wake the at-capacity sleep early when a session completes,
+  // so the bridge can immediately accept new work.
+  const capacityWake = createCapacityWake(loopSignal)
+
+  /**
+   * Heartbeat all active work items.
+   * Returns 'ok' if at least one heartbeat succeeded, 'auth_failed' if any
+   * got a 401/403 (JWT expired — re-queued via reconnectSession so the next
+   * poll delivers fresh work), or 'failed' if all failed for other reasons.
+   */
+  async function heartbeatActiveWorkItems(): Promise<
+    'ok' | 'auth_failed' | 'fatal' | 'failed'
+  > {
+    let anySuccess = false
+    let anyFatal = false
+    const authFailedSessions: string[] = []
+    for (const [sessionId] of activeSessions) {
+      const workId = sessionWorkIds.get(sessionId)
+      const ingressToken = sessionIngressTokens.get(sessionId)
+      if (!workId || !ingressToken) {
+        continue
+      }
+      try {
+        await api.heartbeatWork(environmentId, workId, ingressToken)
+        anySuccess = true
+      } catch (err) {
+        logForDebugging(
+          `[bridge:heartbeat] Failed for sessionId=${sessionId} workId=${workId}: ${errorMessage(err)}`,
+        )
+        if (err instanceof BridgeFatalError) {
+          logEvent('tengu_bridge_heartbeat_error', {
+            status:
+              err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            error_type: (err.status === 401 || err.status === 403
+              ? 'auth_failed'
+              : 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          if (err.status === 401 || err.status === 403) {
+            authFailedSessions.push(sessionId)
+          } else {
+            // 404/410 = environment expired or deleted — no point retrying
+            anyFatal = true
+          }
+        }
+      }
+    }
+    // JWT expired → trigger server-side re-dispatch. Without this, work stays
+    // ACK'd out of the Redis PEL and poll returns empty forever (CC-1263).
+    // The existingHandle path below delivers the fresh token to the child.
+    // sessionId is already in the format /bridge/reconnect expects: it comes
+    // from work.data.id, which matches the server's EnvironmentInstance store
+    // (cse_* under the compat gate, session_* otherwise).
+    for (const sessionId of authFailedSessions) {
+      logger.logVerbose(
+        `Session ${sessionId} token expired — re-queuing via bridge/reconnect`,
+      )
+      try {
+        await api.reconnectSession(environmentId, sessionId)
+        logForDebugging(
+          `[bridge:heartbeat] Re-queued sessionId=${sessionId} via bridge/reconnect`,
+        )
+      } catch (err) {
+        logger.logError(
+          `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
+        )
+        logForDebugging(
+          `[bridge:heartbeat] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
+          { level: 'error' },
+        )
+      }
+    }
+    if (anyFatal) {
+      return 'fatal'
+    }
+    if (authFailedSessions.length > 0) {
+      return 'auth_failed'
+    }
+    return anySuccess ? 'ok' : 'failed'
+  }
+
+  // Sessions spawned with CCR v2 env vars. v2 children cannot use OAuth
+  // tokens (CCR worker endpoints validate the JWT's session_id claim,
+  // register_worker.go:32), so onRefresh triggers server re-dispatch
+  // instead — the next poll delivers fresh work with a new JWT via the
+  // existingHandle path below.
+  const v2Sessions = new Set<string>()
+
+  // Proactive token refresh: schedules a timer 5min before the session
+  // ingress JWT expires. v1 delivers OAuth directly; v2 calls
+  // reconnectSession to trigger server re-dispatch (CC-1263: without
+  // this, v2 daemon sessions silently die at ~5h since the server does
+  // not auto-re-dispatch ACK'd work on lease expiry).
+  const tokenRefresh = getAccessToken
+    ? createTokenRefreshScheduler({
+        getAccessToken,
+        onRefresh: (sessionId, oauthToken) => {
+          const handle = activeSessions.get(sessionId)
+          if (!handle) {
+            return
+          }
+          if (v2Sessions.has(sessionId)) {
+            logger.logVerbose(
+              `Refreshing session ${sessionId} token via bridge/reconnect`,
+            )
+            void api
+              .reconnectSession(environmentId, sessionId)
+              .catch((err: unknown) => {
+                logger.logError(
+                  `Failed to refresh session ${sessionId} token: ${errorMessage(err)}`,
+                )
+                logForDebugging(
+                  `[bridge:token] reconnectSession(${sessionId}) failed: ${errorMessage(err)}`,
+                  { level: 'error' },
+                )
+              })
+          } else {
+            handle.updateAccessToken(oauthToken)
+          }
+        },
+        label: 'bridge',
+      })
+    : null
+  const loopStartTime = Date.now()
+  // Track all in-flight cleanup promises (stopWork, worktree removal) so
+  // the shutdown sequence can await them before process.exit().
+  const pendingCleanups = new Set<Promise<unknown>>()
+  function trackCleanup(p: Promise<unknown>): void {
+    pendingCleanups.add(p)
+    void p.finally(() => pendingCleanups.delete(p))
+  }
+  let connBackoff = 0
+  let generalBackoff = 0
+  let connErrorStart: number | null = null
+  let generalErrorStart: number | null = null
+  let lastPollErrorTime: number | null = null
+  let statusUpdateTimer: ReturnType<typeof setInterval> | null = null
+  // Set by BridgeFatalError and give-up paths so the shutdown block can
+  // skip the resume message (resume is impossible after env expiry/auth
+  // failure/sustained connection errors).
+  let fatalExit = false
+
+  logForDebugging(
+    `[bridge:work] Starting poll loop spawnMode=${config.spawnMode} maxSessions=${config.maxSessions} environmentId=${environmentId}`,
+  )
+  logForDiagnosticsNoPII('info', 'bridge_loop_started', {
+    max_sessions: config.maxSessions,
+    spawn_mode: config.spawnMode,
+  })
+
+  // For ant users, show where session debug logs will land so they can tail them.
+  // sessionRunner.ts uses the same base path. File appears once a session spawns.
+  if (process.env.USER_TYPE === 'ant') {
+    let debugGlob: string
+    if (config.debugFile) {
+      const ext = config.debugFile.lastIndexOf('.')
+      debugGlob =
+        ext > 0
+          ? `${config.debugFile.slice(0, ext)}-*${config.debugFile.slice(ext)}`
+          : `${config.debugFile}-*`
+    } else {
+      debugGlob = join(tmpdir(), 'claude', 'bridge-session-*.log')
+    }
+    logger.setDebugLogPath(debugGlob)
+  }
+
+  logger.printBanner(config, environmentId)
+
+  // Seed the logger's session count + spawn mode before any render. Without
+  // this, setAttached() below renders with the logger's default sessionMax=1,
+  // showing "Capacity: 0/1" until the status ticker kicks in (which is gated
+  // by !initialSessionId and only starts after the poll loop picks up work).
+  logger.updateSessionCount(0, config.maxSessions, config.spawnMode)
+
+  // If an initial session was pre-created, show its URL from the start so
+  // the user can click through immediately (matching /remote-control behavior).
+  if (initialSessionId) {
+    logger.setAttached(initialSessionId)
+  }
+
+  /** Refresh the inline status display. Shows idle or active depending on state. */
+  function updateStatusDisplay(): void {
+    // Push the session count (no-op when maxSessions === 1) so the
+    // next renderStatusLine tick shows the current count.
+    logger.updateSessionCount(
+      activeSessions.size,
+      config.maxSessions,
+      config.spawnMode,
+    )
+
+    // Push per-session activity into the multi-session display.
+    for (const [sid, handle] of activeSessions) {
+      const act = handle.currentActivity
+      if (act) {
+        logger.updateSessionActivity(sessionCompatIds.get(sid) ?? sid, act)
+      }
+    }
+
+    if (activeSessions.size === 0) {
+      logger.updateIdleStatus()
+      return
+    }
+
+    // Show the most recently started session that is still actively working.
+    // Sessions whose current activity is 'result' or 'error' are between
+    // turns — the CLI emitted its result but the process stays alive waiting
+    // for the next user message.  Skip updating so the status line keeps
+    // whatever state it had (Attached / session title).
+    const [sessionId, handle] = [...activeSessions.entries()].pop()!
+    const startTime = sessionStartTimes.get(sessionId)
+    if (!startTime) return
+
+    const activity = handle.currentActivity
+    if (!activity || activity.type === 'result' || activity.type === 'error') {
+      // Session is between turns — keep current status (Attached/titled).
+      // In multi-session mode, still refresh so bullet-list activities stay current.
+      if (config.maxSessions > 1) logger.refreshDisplay()
+      return
+    }
+
+    const elapsed = formatDuration(Date.now() - startTime)
+
+    // Build trail from recent tool activities (last 5)
+    const trail = handle.activities
+      .filter(a => a.type === 'tool_start')
+      .slice(-5)
+      .map(a => a.summary)
+
+    logger.updateSessionStatus(sessionId, elapsed, activity, trail)
+  }
+
+  /** Start the status display update ticker. */
+  function startStatusUpdates(): void {
+    stopStatusUpdates()
+    // Call immediately so the first transition (e.g. Connecting → Ready)
+    // happens without delay, avoiding concurrent timer races.
+    updateStatusDisplay()
+    statusUpdateTimer = setInterval(
+      updateStatusDisplay,
+      STATUS_UPDATE_INTERVAL_MS,
+    )
+  }
+
+  /** Stop the status display update ticker. */
+  function stopStatusUpdates(): void {
+    if (statusUpdateTimer) {
+      clearInterval(statusUpdateTimer)
+      statusUpdateTimer = null
+    }
+  }
+
+  function onSessionDone(
+    sessionId: string,
+    startTime: number,
+    handle: SessionHandle,
+  ): (status: SessionDoneStatus) => void {
+    return (rawStatus: SessionDoneStatus): void => {
+      const workId = sessionWorkIds.get(sessionId)
+      activeSessions.delete(sessionId)
+      sessionStartTimes.delete(sessionId)
+      sessionWorkIds.delete(sessionId)
+      sessionIngressTokens.delete(sessionId)
+      const compatId = sessionCompatIds.get(sessionId) ?? sessionId
+      sessionCompatIds.delete(sessionId)
+      logger.removeSession(compatId)
+      titledSessions.delete(compatId)
+      v2Sessions.delete(sessionId)
+      // Clear per-session timeout timer
+      const timer = sessionTimers.get(sessionId)
+      if (timer) {
+        clearTimeout(timer)
+        sessionTimers.delete(sessionId)
+      }
+      // Clear token refresh timer
+      tokenRefresh?.cancel(sessionId)
+      // Wake the at-capacity sleep so the bridge can accept new work immediately
+      capacityWake.wake()
+
+      // If the session was killed by the timeout watchdog, treat it as a
+      // failed session (not a server/shutdown interrupt) so we still call
+      // stopWork and archiveSession below.
+      const wasTimedOut = timedOutSessions.delete(sessionId)
+      const status: SessionDoneStatus =
+        wasTimedOut && rawStatus === 'interrupted' ? 'failed' : rawStatus
+      const durationMs = Date.now() - startTime
+
+      logForDebugging(
+        `[bridge:session] sessionId=${sessionId} workId=${workId ?? 'unknown'} exited status=${status} duration=${formatDuration(durationMs)}`,
+      )
+      logEvent('tengu_bridge_session_done', {
+        status:
+          status as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        duration_ms: durationMs,
+      })
+      logForDiagnosticsNoPII('info', 'bridge_session_done', {
+        status,
+        duration_ms: durationMs,
+      })
+
+      // Clear the status display before printing final log
+      logger.clearStatus()
+      stopStatusUpdates()
+
+      // Build error message from stderr if available
+      const stderrSummary =
+        handle.lastStderr.length > 0 ? handle.lastStderr.join('\n') : undefined
+      let failureMessage: string | undefined
+
+      switch (status) {
+        case 'completed':
+          logger.logSessionComplete(sessionId, durationMs)
+          break
+        case 'failed':
+          // Skip failure log during shutdown — the child exits non-zero when
+          // killed, which is expected and not a real failure.
+          // Also skip for timeout-killed sessions — the timeout watchdog
+          // already logged a clear timeout message.
+          if (!wasTimedOut && !loopSignal.aborted) {
+            failureMessage = stderrSummary ?? 'Process exited with error'
+            logger.logSessionFailed(sessionId, failureMessage)
+            logError(new Error(`Bridge session failed: ${failureMessage}`))
+          }
+          break
+        case 'interrupted':
+          logger.logVerbose(`Session ${sessionId} interrupted`)
+          break
+      }
+
+      // Notify the server that this work item is done. Skip for interrupted
+      // sessions — interrupts are either server-initiated (the server already
+      // knows) or caused by bridge shutdown (which calls stopWork() separately).
+      if (status !== 'interrupted' && workId) {
+        trackCleanup(
+          stopWorkWithRetry(
+            api,
+            environmentId,
+            workId,
+            logger,
+            backoffConfig.stopWorkBaseDelayMs,
+          ),
+        )
+        completedWorkIds.add(workId)
+      }
+
+      // Clean up worktree if one was created for this session
+      const wt = sessionWorktrees.get(sessionId)
+      if (wt) {
+        sessionWorktrees.delete(sessionId)
+        trackCleanup(
+          removeAgentWorktree(
+            wt.worktreePath,
+            wt.worktreeBranch,
+            wt.gitRoot,
+            wt.hookBased,
+          ).catch((err: unknown) =>
+            logger.logVerbose(
+              `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
+            ),
+          ),
+        )
+      }
+
+      // Lifecycle decision: in multi-session mode, keep the bridge running
+      // after a session completes. In single-session mode, abort the poll
+      // loop so the bridge exits cleanly.
+      if (status !== 'interrupted' && !loopSignal.aborted) {
+        if (config.spawnMode !== 'single-session') {
+          // Multi-session: archive the completed session so it doesn't linger
+          // as stale in the web UI. archiveSession is idempotent (409 if already
+          // archived), so double-archiving at shutdown is safe.
+          // sessionId arrived as cse_* from the work poll (infrastructure-layer
+          // tag). archiveSession hits /v1/sessions/{id}/archive which is the
+          // compat surface and validates TagSession (session_*). Re-tag — same
+          // UUID underneath.
+          trackCleanup(
+            api
+              .archiveSession(compatId)
+              .catch((err: unknown) =>
+                logger.logVerbose(
+                  `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
+                ),
+              ),
+          )
+          logForDebugging(
+            `[bridge:session] Session ${status}, returning to idle (multi-session mode)`,
+          )
+        } else {
+          // Single-session: coupled lifecycle — tear down environment
+          logForDebugging(
+            `[bridge:session] Session ${status}, aborting poll loop to tear down environment`,
+          )
+          controller.abort()
+          return
+        }
+      }
+
+      if (!loopSignal.aborted) {
+        startStatusUpdates()
+      }
+    }
+  }
+
+  // Start the idle status display immediately — unless we have a pre-created
+  // session, in which case setAttached() already set up the display and the
+  // poll loop will start status updates when it picks up the session.
+  if (!initialSessionId) {
+    startStatusUpdates()
+  }
+
+  while (!loopSignal.aborted) {
+    // Fetched once per iteration — the GrowthBook cache refreshes every
+    // 5 min, so a loop running at the at-capacity rate picks up config
+    // changes within one sleep cycle.
+    const pollConfig = getPollIntervalConfig()
+
+    try {
+      const work = await api.pollForWork(
+        environmentId,
+        environmentSecret,
+        loopSignal,
+        pollConfig.reclaim_older_than_ms,
+      )
+
+      // Log reconnection if we were previously disconnected
+      const wasDisconnected =
+        connErrorStart !== null || generalErrorStart !== null
+      if (wasDisconnected) {
+        const disconnectedMs =
+          Date.now() - (connErrorStart ?? generalErrorStart ?? Date.now())
+        logger.logReconnected(disconnectedMs)
+        logForDebugging(
+          `[bridge:poll] Reconnected after ${formatDuration(disconnectedMs)}`,
+        )
+        logEvent('tengu_bridge_reconnected', {
+          disconnected_ms: disconnectedMs,
+        })
+      }
+
+      connBackoff = 0
+      generalBackoff = 0
+      connErrorStart = null
+      generalErrorStart = null
+      lastPollErrorTime = null
+
+      // Null response = no work available in the queue.
+      // Add a minimum delay to avoid hammering the server.
+      if (!work) {
+        // Use live check (not a snapshot) since sessions can end during poll.
+        const atCap = activeSessions.size >= config.maxSessions
+        if (atCap) {
+          const atCapMs = pollConfig.multisession_poll_interval_ms_at_capacity
+          // Heartbeat loops WITHOUT polling. When at-capacity polling is also
+          // enabled (atCapMs > 0), the loop tracks a deadline and breaks out
+          // to poll at that interval — heartbeat and poll compose instead of
+          // one suppressing the other. We break out to poll when:
+          //   - Poll deadline reached (atCapMs > 0 only)
+          //   - Auth fails (JWT expired → poll refreshes tokens)
+          //   - Capacity wake fires (session ended → poll for new work)
+          //   - Loop aborted (shutdown)
+          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+            logEvent('tengu_bridge_heartbeat_mode_entered', {
+              active_sessions: activeSessions.size,
+              heartbeat_interval_ms:
+                pollConfig.non_exclusive_heartbeat_interval_ms,
+            })
+            // Deadline computed once at entry — GB updates to atCapMs don't
+            // shift an in-flight deadline (next entry picks up the new value).
+            const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
+            let hbResult: 'ok' | 'auth_failed' | 'fatal' | 'failed' = 'ok'
+            let hbCycles = 0
+            while (
+              !loopSignal.aborted &&
+              activeSessions.size >= config.maxSessions &&
+              (pollDeadline === null || Date.now() < pollDeadline)
+            ) {
+              // Re-read config each cycle so GrowthBook updates take effect
+              const hbConfig = getPollIntervalConfig()
+              if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
+
+              // Capture capacity signal BEFORE the async heartbeat call so
+              // a session ending during the HTTP request is caught by the
+              // subsequent sleep (instead of being lost to a replaced controller).
+              const cap = capacityWake.signal()
+
+              hbResult = await heartbeatActiveWorkItems()
+              if (hbResult === 'auth_failed' || hbResult === 'fatal') {
+                cap.cleanup()
+                break
+              }
+
+              hbCycles++
+              await sleep(
+                hbConfig.non_exclusive_heartbeat_interval_ms,
+                cap.signal,
+              )
+              cap.cleanup()
+            }
+
+            // Determine exit reason for telemetry
+            const exitReason =
+              hbResult === 'auth_failed' || hbResult === 'fatal'
+                ? hbResult
+                : loopSignal.aborted
+                  ? 'shutdown'
+                  : activeSessions.size < config.maxSessions
+                    ? 'capacity_changed'
+                    : pollDeadline !== null && Date.now() >= pollDeadline
+                      ? 'poll_due'
+                      : 'config_disabled'
+            logEvent('tengu_bridge_heartbeat_mode_exited', {
+              reason:
+                exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              heartbeat_cycles: hbCycles,
+              active_sessions: activeSessions.size,
+            })
+            if (exitReason === 'poll_due') {
+              // bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
+              // so the once-per-10min poll_due poll is invisible at counter=2.
+              // Log it here so verification runs see both endpoints in the debug log.
+              logForDebugging(
+                `[bridge:poll] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
+              )
+            }
+
+            // On auth_failed or fatal, sleep before polling to avoid a tight
+            // poll+heartbeat loop. Auth_failed: heartbeatActiveWorkItems
+            // already called reconnectSession — the sleep gives the server
+            // time to propagate the re-queue. Fatal (404/410): may be a
+            // single work item GCd while the environment is still valid.
+            // Use atCapMs if enabled, else the heartbeat interval as a floor
+            // (guaranteed > 0 here) so heartbeat-only configs don't tight-loop.
+            if (hbResult === 'auth_failed' || hbResult === 'fatal') {
+              const cap = capacityWake.signal()
+              await sleep(
+                atCapMs > 0
+                  ? atCapMs
+                  : pollConfig.non_exclusive_heartbeat_interval_ms,
+                cap.signal,
+              )
+              cap.cleanup()
+            }
+          } else if (atCapMs > 0) {
+            // Heartbeat disabled: slow poll as liveness signal.
+            const cap = capacityWake.signal()
+            await sleep(atCapMs, cap.signal)
+            cap.cleanup()
+          }
+        } else {
+          const interval =
+            activeSessions.size > 0
+              ? pollConfig.multisession_poll_interval_ms_partial_capacity
+              : pollConfig.multisession_poll_interval_ms_not_at_capacity
+          await sleep(interval, loopSignal)
+        }
+        continue
+      }
+
+      // At capacity — we polled to keep the heartbeat alive, but cannot
+      // accept new work right now. We still enter the switch below so that
+      // token refreshes for existing sessions are processed (the case
+      // 'session' handler checks for existing sessions before the inner
+      // capacity guard).
+      const atCapacityBeforeSwitch = activeSessions.size >= config.maxSessions
+
+      // Skip work items that have already been completed and stopped.
+      // The server may re-deliver stale work before processing our stop
+      // request, which would otherwise cause a duplicate session spawn.
+      if (completedWorkIds.has(work.id)) {
+        logForDebugging(
+          `[bridge:work] Skipping already-completed workId=${work.id}`,
+        )
+        // Respect capacity throttle — without a sleep here, persistent stale
+        // redeliveries would tight-loop at poll-request speed (the !work
+        // branch above is the only sleep, and work != null skips it).
+        if (atCapacityBeforeSwitch) {
+          const cap = capacityWake.signal()
+          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+            await heartbeatActiveWorkItems()
+            await sleep(
+              pollConfig.non_exclusive_heartbeat_interval_ms,
+              cap.signal,
+            )
+          } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
+            await sleep(
+              pollConfig.multisession_poll_interval_ms_at_capacity,
+              cap.signal,
+            )
+          }
+          cap.cleanup()
+        } else {
+          await sleep(1000, loopSignal)
+        }
+        continue
+      }
+
+      // Decode the work secret for session spawning and to extract the JWT
+      // used for the ack call below.
+      let secret
+      try {
+        secret = decodeWorkSecret(work.secret)
+      } catch (err) {
+        const errMsg = errorMessage(err)
+        logger.logError(
+          `Failed to decode work secret for workId=${work.id}: ${errMsg}`,
+        )
+        logEvent('tengu_bridge_work_secret_failed', {})
+        // Can't ack (needs the JWT we failed to decode). stopWork uses OAuth,
+        // so it's callable here — prevents XAUTOCLAIM from re-delivering this
+        // poisoned item every reclaim_older_than_ms cycle.
+        completedWorkIds.add(work.id)
+        trackCleanup(
+          stopWorkWithRetry(
+            api,
+            environmentId,
+            work.id,
+            logger,
+            backoffConfig.stopWorkBaseDelayMs,
+          ),
+        )
+        // Respect capacity throttle before retrying — without a sleep here,
+        // repeated decode failures at capacity would tight-loop at
+        // poll-request speed (work != null skips the !work sleep above).
+        if (atCapacityBeforeSwitch) {
+          const cap = capacityWake.signal()
+          if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+            await heartbeatActiveWorkItems()
+            await sleep(
+              pollConfig.non_exclusive_heartbeat_interval_ms,
+              cap.signal,
+            )
+          } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
+            await sleep(
+              pollConfig.multisession_poll_interval_ms_at_capacity,
+              cap.signal,
+            )
+          }
+          cap.cleanup()
+        }
+        continue
+      }
+
+      // Explicitly acknowledge after committing to handle the work — NOT
+      // before. The at-capacity guard inside case 'session' can break
+      // without spawning; acking there would permanently lose the work.
+      // Ack failures are non-fatal: server re-delivers, and existingHandle
+      // / completedWorkIds paths handle the dedup.
+      const ackWork = async (): Promise<void> => {
+        logForDebugging(`[bridge:work] Acknowledging workId=${work.id}`)
+        try {
+          await api.acknowledgeWork(
+            environmentId,
+            work.id,
+            secret.session_ingress_token,
+          )
+        } catch (err) {
+          logForDebugging(
+            `[bridge:work] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
+          )
+        }
+      }
+
+      const workType: string = work.data.type
+      switch (work.data.type) {
+        case 'healthcheck':
+          await ackWork()
+          logForDebugging('[bridge:work] Healthcheck received')
+          logger.logVerbose('Healthcheck received')
+          break
+        case 'session': {
+          const sessionId = work.data.id
+          try {
+            validateBridgeId(sessionId, 'session_id')
+          } catch {
+            await ackWork()
+            logger.logError(`Invalid session_id received: ${sessionId}`)
+            break
+          }
+
+          // If the session is already running, deliver the fresh token so
+          // the child process can reconnect its WebSocket with the new
+          // session ingress token. This handles the case where the server
+          // re-dispatches work for an existing session after the WS drops.
+          const existingHandle = activeSessions.get(sessionId)
+          if (existingHandle) {
+            existingHandle.updateAccessToken(secret.session_ingress_token)
+            sessionIngressTokens.set(sessionId, secret.session_ingress_token)
+            sessionWorkIds.set(sessionId, work.id)
+            // Re-schedule next refresh from the fresh JWT's expiry. onRefresh
+            // branches on v2Sessions so both v1 and v2 are safe here.
+            tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
+            logForDebugging(
+              `[bridge:work] Updated access token for existing sessionId=${sessionId} workId=${work.id}`,
+            )
+            await ackWork()
+            break
+          }
+
+          // At capacity — token refresh for existing sessions is handled
+          // above, but we cannot spawn new ones. The post-switch capacity
+          // sleep will throttle the loop; just break here.
+          if (activeSessions.size >= config.maxSessions) {
+            logForDebugging(
+              `[bridge:work] At capacity (${activeSessions.size}/${config.maxSessions}), cannot spawn new session for workId=${work.id}`,
+            )
+            break
+          }
+
+          await ackWork()
+          const spawnStartTime = Date.now()
+
+          // CCR v2 path: register this bridge as the session worker, get the
+          // epoch, and point the child at /v1/code/sessions/{id}. The child
+          // already has the full v2 client (SSETransport + CCRClient) — same
+          // code path environment-manager launches in containers.
+          //
+          // v1 path: Session-Ingress WebSocket. Uses config.sessionIngressUrl
+          // (not secret.api_base_url, which may point to a remote proxy tunnel
+          // that doesn't know about locally-created sessions).
+          let sdkUrl: string
+          let useCcrV2 = false
+          let workerEpoch: number | undefined
+          // Server decides per-session via the work secret; env var is the
+          // ant-dev override (e.g. forcing v2 before the server flag is on).
+          if (
+            secret.use_code_sessions === true ||
+            isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
+          ) {
+            sdkUrl = buildCCRv2SdkUrl(config.apiBaseUrl, sessionId)
+            // Retry once on transient failure (network blip, 500) before
+            // permanently giving up and killing the session.
+            for (let attempt = 1; attempt <= 2; attempt++) {
+              try {
+                workerEpoch = await registerWorker(
+                  sdkUrl,
+                  secret.session_ingress_token,
+                )
+                useCcrV2 = true
+                logForDebugging(
+                  `[bridge:session] CCR v2: registered worker sessionId=${sessionId} epoch=${workerEpoch} attempt=${attempt}`,
+                )
+                break
+              } catch (err) {
+                const errMsg = errorMessage(err)
+                if (attempt < 2) {
+                  logForDebugging(
+                    `[bridge:session] CCR v2: registerWorker attempt ${attempt} failed, retrying: ${errMsg}`,
+                  )
+                  await sleep(2_000, loopSignal)
+                  if (loopSignal.aborted) break
+                  continue
+                }
+                logger.logError(
+                  `CCR v2 worker registration failed for session ${sessionId}: ${errMsg}`,
+                )
+                logError(new Error(`registerWorker failed: ${errMsg}`))
+                completedWorkIds.add(work.id)
+                trackCleanup(
+                  stopWorkWithRetry(
+                    api,
+                    environmentId,
+                    work.id,
+                    logger,
+                    backoffConfig.stopWorkBaseDelayMs,
+                  ),
+                )
+              }
+            }
+            if (!useCcrV2) break
+          } else {
+            sdkUrl = buildSdkUrl(config.sessionIngressUrl, sessionId)
+          }
+
+          // In worktree mode, on-demand sessions get an isolated git worktree
+          // so concurrent sessions don't interfere with each other's file
+          // changes. The pre-created initial session (if any) runs in
+          // config.dir so the user's first session lands in the directory they
+          // invoked `rc` from — matching the old single-session UX.
+          // In same-dir and single-session modes, all sessions share config.dir.
+          // Capture spawnMode before the await below — the `w` key handler
+          // mutates config.spawnMode directly, and createAgentWorktree can
+          // take 1-2s, so reading config.spawnMode after the await can
+          // produce contradictory analytics (spawn_mode:'same-dir', in_worktree:true).
+          const spawnModeAtDecision = config.spawnMode
+          let sessionDir = config.dir
+          let worktreeCreateMs = 0
+          if (
+            spawnModeAtDecision === 'worktree' &&
+            (initialSessionId === undefined ||
+              !sameSessionId(sessionId, initialSessionId))
+          ) {
+            const wtStart = Date.now()
+            try {
+              const wt = await createAgentWorktree(
+                `bridge-${safeFilenameId(sessionId)}`,
+              )
+              worktreeCreateMs = Date.now() - wtStart
+              sessionWorktrees.set(sessionId, {
+                worktreePath: wt.worktreePath,
+                worktreeBranch: wt.worktreeBranch,
+                gitRoot: wt.gitRoot,
+                hookBased: wt.hookBased,
+              })
+              sessionDir = wt.worktreePath
+              logForDebugging(
+                `[bridge:session] Created worktree for sessionId=${sessionId} at ${wt.worktreePath}`,
+              )
+            } catch (err) {
+              const errMsg = errorMessage(err)
+              logger.logError(
+                `Failed to create worktree for session ${sessionId}: ${errMsg}`,
+              )
+              logError(new Error(`Worktree creation failed: ${errMsg}`))
+              completedWorkIds.add(work.id)
+              trackCleanup(
+                stopWorkWithRetry(
+                  api,
+                  environmentId,
+                  work.id,
+                  logger,
+                  backoffConfig.stopWorkBaseDelayMs,
+                ),
+              )
+              break
+            }
+          }
+
+          logForDebugging(
+            `[bridge:session] Spawning sessionId=${sessionId} sdkUrl=${sdkUrl}`,
+          )
+
+          // compat-surface session_* form for logger/Sessions-API calls.
+          // Work poll returns cse_* under v2 compat; convert before spawn so
+          // the onFirstUserMessage callback can close over it.
+          const compatSessionId = toCompatSessionId(sessionId)
+
+          const spawnResult = safeSpawn(
+            spawner,
+            {
+              sessionId,
+              sdkUrl,
+              accessToken: secret.session_ingress_token,
+              useCcrV2,
+              workerEpoch,
+              onFirstUserMessage: text => {
+                // Server-set titles (--name, web rename) win. fetchSessionTitle
+                // runs concurrently; if it already populated titledSessions,
+                // skip. If it hasn't resolved yet, the derived title sticks —
+                // acceptable since the server had no title at spawn time.
+                if (titledSessions.has(compatSessionId)) return
+                titledSessions.add(compatSessionId)
+                const title = deriveSessionTitle(text)
+                logger.setSessionTitle(compatSessionId, title)
+                logForDebugging(
+                  `[bridge:title] derived title for ${compatSessionId}: ${title}`,
+                )
+                void import('./createSession.js')
+                  .then(({ updateBridgeSessionTitle }) =>
+                    updateBridgeSessionTitle(compatSessionId, title, {
+                      baseUrl: config.apiBaseUrl,
+                    }),
+                  )
+                  .catch(err =>
+                    logForDebugging(
+                      `[bridge:title] failed to update title for ${compatSessionId}: ${err}`,
+                      { level: 'error' },
+                    ),
+                  )
+              },
+            },
+            sessionDir,
+          )
+          if (typeof spawnResult === 'string') {
+            logger.logError(
+              `Failed to spawn session ${sessionId}: ${spawnResult}`,
+            )
+            // Clean up worktree if one was created for this session
+            const wt = sessionWorktrees.get(sessionId)
+            if (wt) {
+              sessionWorktrees.delete(sessionId)
+              trackCleanup(
+                removeAgentWorktree(
+                  wt.worktreePath,
+                  wt.worktreeBranch,
+                  wt.gitRoot,
+                  wt.hookBased,
+                ).catch((err: unknown) =>
+                  logger.logVerbose(
+                    `Failed to remove worktree ${wt.worktreePath}: ${errorMessage(err)}`,
+                  ),
+                ),
+              )
+            }
+            completedWorkIds.add(work.id)
+            trackCleanup(
+              stopWorkWithRetry(
+                api,
+                environmentId,
+                work.id,
+                logger,
+                backoffConfig.stopWorkBaseDelayMs,
+              ),
+            )
+            break
+          }
+          const handle = spawnResult
+
+          const spawnDurationMs = Date.now() - spawnStartTime
+          logEvent('tengu_bridge_session_started', {
+            active_sessions: activeSessions.size,
+            spawn_mode:
+              spawnModeAtDecision as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            in_worktree: sessionWorktrees.has(sessionId),
+            spawn_duration_ms: spawnDurationMs,
+            worktree_create_ms: worktreeCreateMs,
+            inProtectedNamespace: isInProtectedNamespace(),
+          })
+          logForDiagnosticsNoPII('info', 'bridge_session_started', {
+            spawn_mode: spawnModeAtDecision,
+            in_worktree: sessionWorktrees.has(sessionId),
+            spawn_duration_ms: spawnDurationMs,
+            worktree_create_ms: worktreeCreateMs,
+          })
+
+          activeSessions.set(sessionId, handle)
+          sessionWorkIds.set(sessionId, work.id)
+          sessionIngressTokens.set(sessionId, secret.session_ingress_token)
+          sessionCompatIds.set(sessionId, compatSessionId)
+
+          const startTime = Date.now()
+          sessionStartTimes.set(sessionId, startTime)
+
+          // Use a generic prompt description since we no longer get startup_context
+          logger.logSessionStart(sessionId, `Session ${sessionId}`)
+
+          // Compute the actual debug file path (mirrors sessionRunner.ts logic)
+          const safeId = safeFilenameId(sessionId)
+          let sessionDebugFile: string | undefined
+          if (config.debugFile) {
+            const ext = config.debugFile.lastIndexOf('.')
+            if (ext > 0) {
+              sessionDebugFile = `${config.debugFile.slice(0, ext)}-${safeId}${config.debugFile.slice(ext)}`
+            } else {
+              sessionDebugFile = `${config.debugFile}-${safeId}`
+            }
+          } else if (config.verbose || process.env.USER_TYPE === 'ant') {
+            sessionDebugFile = join(
+              tmpdir(),
+              'claude',
+              `bridge-session-${safeId}.log`,
+            )
+          }
+
+          if (sessionDebugFile) {
+            logger.logVerbose(`Debug log: ${sessionDebugFile}`)
+          }
+
+          // Register in the sessions Map before starting status updates so the
+          // first render tick shows the correct count and bullet list in sync.
+          logger.addSession(
+            compatSessionId,
+            getRemoteSessionUrl(compatSessionId, config.sessionIngressUrl),
+          )
+
+          // Start live status updates and transition to "Attached" state.
+          startStatusUpdates()
+          logger.setAttached(compatSessionId)
+
+          // One-shot title fetch. If the session already has a title (set via
+          // --name, web rename, or /remote-control), display it and mark as
+          // titled so the first-user-message fallback doesn't overwrite it.
+          // Otherwise onFirstUserMessage derives one from the first prompt.
+          void fetchSessionTitle(compatSessionId, config.apiBaseUrl)
+            .then(title => {
+              if (title && activeSessions.has(sessionId)) {
+                titledSessions.add(compatSessionId)
+                logger.setSessionTitle(compatSessionId, title)
+                logForDebugging(
+                  `[bridge:title] server title for ${compatSessionId}: ${title}`,
+                )
+              }
+            })
+            .catch(err =>
+              logForDebugging(
+                `[bridge:title] failed to fetch title for ${compatSessionId}: ${err}`,
+                { level: 'error' },
+              ),
+            )
+
+          // Start per-session timeout watchdog
+          const timeoutMs =
+            config.sessionTimeoutMs ?? DEFAULT_SESSION_TIMEOUT_MS
+          if (timeoutMs > 0) {
+            const timer = setTimeout(
+              onSessionTimeout,
+              timeoutMs,
+              sessionId,
+              timeoutMs,
+              logger,
+              timedOutSessions,
+              handle,
+            )
+            sessionTimers.set(sessionId, timer)
+          }
+
+          // Schedule proactive token refresh before the JWT expires.
+          // onRefresh branches on v2Sessions: v1 delivers OAuth to the
+          // child, v2 triggers server re-dispatch via reconnectSession.
+          if (useCcrV2) {
+            v2Sessions.add(sessionId)
+          }
+          tokenRefresh?.schedule(sessionId, secret.session_ingress_token)
+
+          void handle.done.then(onSessionDone(sessionId, startTime, handle))
+          break
+        }
+        default:
+          await ackWork()
+          // Gracefully ignore unknown work types. The backend may send new
+          // types before the bridge client is updated.
+          logForDebugging(
+            `[bridge:work] Unknown work type: ${workType}, skipping`,
+          )
+          break
+      }
+
+      // When at capacity, throttle the loop. The switch above still runs so
+      // existing-session token refreshes are processed, but we sleep here
+      // to avoid busy-looping. Include the capacity wake signal so the
+      // sleep is interrupted immediately when a session completes.
+      if (atCapacityBeforeSwitch) {
+        const cap = capacityWake.signal()
+        if (pollConfig.non_exclusive_heartbeat_interval_ms > 0) {
+          await heartbeatActiveWorkItems()
+          await sleep(
+            pollConfig.non_exclusive_heartbeat_interval_ms,
+            cap.signal,
+          )
+        } else if (pollConfig.multisession_poll_interval_ms_at_capacity > 0) {
+          await sleep(
+            pollConfig.multisession_poll_interval_ms_at_capacity,
+            cap.signal,
+          )
+        }
+        cap.cleanup()
+      }
+    } catch (err) {
+      if (loopSignal.aborted) {
+        break
+      }
+
+      // Fatal errors (401/403) — no point retrying, auth won't fix itself
+      if (err instanceof BridgeFatalError) {
+        fatalExit = true
+        // Server-enforced expiry gets a clean status message, not an error
+        if (isExpiredErrorType(err.errorType)) {
+          logger.logStatus(err.message)
+        } else if (isSuppressible403(err)) {
+          // Cosmetic 403 errors (e.g., external_poll_sessions scope,
+          // environments:manage permission) — don't show to user
+          logForDebugging(`[bridge:work] Suppressed 403 error: ${err.message}`)
+        } else {
+          logger.logError(err.message)
+          logError(err)
+        }
+        logEvent('tengu_bridge_fatal_error', {
+          status: err.status,
+          error_type:
+            err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+        logForDiagnosticsNoPII(
+          isExpiredErrorType(err.errorType) ? 'info' : 'error',
+          'bridge_fatal_error',
+          { status: err.status, error_type: err.errorType },
+        )
+        break
+      }
+
+      const errMsg = describeAxiosError(err)
+
+      if (isConnectionError(err) || isServerError(err)) {
+        const now = Date.now()
+
+        // Detect system sleep/wake: if the gap since the last poll error
+        // greatly exceeds the expected backoff, the machine likely slept.
+        // Reset error tracking so the bridge retries with a fresh budget.
+        if (
+          lastPollErrorTime !== null &&
+          now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
+        ) {
+          logForDebugging(
+            `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
+          )
+          logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
+            gapMs: now - lastPollErrorTime,
+          })
+          connErrorStart = null
+          connBackoff = 0
+          generalErrorStart = null
+          generalBackoff = 0
+        }
+        lastPollErrorTime = now
+
+        if (!connErrorStart) {
+          connErrorStart = now
+        }
+        const elapsed = now - connErrorStart
+        if (elapsed >= backoffConfig.connGiveUpMs) {
+          logger.logError(
+            `Server unreachable for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
+          )
+          logEvent('tengu_bridge_poll_give_up', {
+            error_type:
+              'connection' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            elapsed_ms: elapsed,
+          })
+          logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
+            error_type: 'connection',
+            elapsed_ms: elapsed,
+          })
+          fatalExit = true
+          break
+        }
+
+        // Reset the other track when switching error types
+        generalErrorStart = null
+        generalBackoff = 0
+
+        connBackoff = connBackoff
+          ? Math.min(connBackoff * 2, backoffConfig.connCapMs)
+          : backoffConfig.connInitialMs
+        const delay = addJitter(connBackoff)
+        logger.logVerbose(
+          `Connection error, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
+        )
+        logger.updateReconnectingStatus(
+          formatDelay(delay),
+          formatDuration(elapsed),
+        )
+        // The poll_due heartbeat-loop exit leaves a healthy lease exposed to
+        // this backoff path. Heartbeat before each sleep so /poll outages
+        // (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced
+        // to avoid) don't kill the 300s lease TTL. No-op when activeSessions
+        // is empty or heartbeat is disabled.
+        if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
+          await heartbeatActiveWorkItems()
+        }
+        await sleep(delay, loopSignal)
+      } else {
+        const now = Date.now()
+
+        // Sleep detection for general errors (same logic as connection errors)
+        if (
+          lastPollErrorTime !== null &&
+          now - lastPollErrorTime > pollSleepDetectionThresholdMs(backoffConfig)
+        ) {
+          logForDebugging(
+            `[bridge:work] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting error budget`,
+          )
+          logForDiagnosticsNoPII('info', 'bridge_poll_sleep_detected', {
+            gapMs: now - lastPollErrorTime,
+          })
+          connErrorStart = null
+          connBackoff = 0
+          generalErrorStart = null
+          generalBackoff = 0
+        }
+        lastPollErrorTime = now
+
+        if (!generalErrorStart) {
+          generalErrorStart = now
+        }
+        const elapsed = now - generalErrorStart
+        if (elapsed >= backoffConfig.generalGiveUpMs) {
+          logger.logError(
+            `Persistent errors for ${Math.round(elapsed / 60_000)} minutes, giving up.`,
+          )
+          logEvent('tengu_bridge_poll_give_up', {
+            error_type:
+              'general' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            elapsed_ms: elapsed,
+          })
+          logForDiagnosticsNoPII('error', 'bridge_poll_give_up', {
+            error_type: 'general',
+            elapsed_ms: elapsed,
+          })
+          fatalExit = true
+          break
+        }
+
+        // Reset the other track when switching error types
+        connErrorStart = null
+        connBackoff = 0
+
+        generalBackoff = generalBackoff
+          ? Math.min(generalBackoff * 2, backoffConfig.generalCapMs)
+          : backoffConfig.generalInitialMs
+        const delay = addJitter(generalBackoff)
+        logger.logVerbose(
+          `Poll failed, retrying in ${formatDelay(delay)} (${Math.round(elapsed / 1000)}s elapsed): ${errMsg}`,
+        )
+        logger.updateReconnectingStatus(
+          formatDelay(delay),
+          formatDuration(elapsed),
+        )
+        if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
+          await heartbeatActiveWorkItems()
+        }
+        await sleep(delay, loopSignal)
+      }
+    }
+  }
+
+  // Clean up
+  stopStatusUpdates()
+  logger.clearStatus()
+
+  const loopDurationMs = Date.now() - loopStartTime
+  logEvent('tengu_bridge_shutdown', {
+    active_sessions: activeSessions.size,
+    loop_duration_ms: loopDurationMs,
+  })
+  logForDiagnosticsNoPII('info', 'bridge_shutdown', {
+    active_sessions: activeSessions.size,
+    loop_duration_ms: loopDurationMs,
+  })
+
+  // Graceful shutdown: kill active sessions, report them as interrupted,
+  // archive sessions, then deregister the environment so the web UI shows
+  // the bridge as offline.
+
+  // Collect all session IDs to archive on exit. This includes:
+  // 1. Active sessions (snapshot before killing — onSessionDone clears maps)
+  // 2. The initial auto-created session (may never have had work dispatched)
+  // api.archiveSession is idempotent (409 if already archived), so
+  // double-archiving is safe.
+  const sessionsToArchive = new Set(activeSessions.keys())
+  if (initialSessionId) {
+    sessionsToArchive.add(initialSessionId)
+  }
+  // Snapshot before killing — onSessionDone clears sessionCompatIds.
+  const compatIdSnapshot = new Map(sessionCompatIds)
+
+  if (activeSessions.size > 0) {
+    logForDebugging(
+      `[bridge:shutdown] Shutting down ${activeSessions.size} active session(s)`,
+    )
+    logger.logStatus(
+      `Shutting down ${activeSessions.size} active session(s)\u2026`,
+    )
+
+    // Snapshot work IDs before killing — onSessionDone clears the maps when
+    // each child exits, so we need a copy for the stopWork calls below.
+    const shutdownWorkIds = new Map(sessionWorkIds)
+
+    for (const [sessionId, handle] of activeSessions.entries()) {
+      logForDebugging(
+        `[bridge:shutdown] Sending SIGTERM to sessionId=${sessionId}`,
+      )
+      handle.kill()
+    }
+
+    const timeout = new AbortController()
+    await Promise.race([
+      Promise.allSettled([...activeSessions.values()].map(h => h.done)),
+      sleep(backoffConfig.shutdownGraceMs ?? 30_000, timeout.signal),
+    ])
+    timeout.abort()
+
+    // SIGKILL any processes that didn't respond to SIGTERM within the grace window
+    for (const [sid, handle] of activeSessions.entries()) {
+      logForDebugging(`[bridge:shutdown] Force-killing stuck sessionId=${sid}`)
+      handle.forceKill()
+    }
+
+    // Clear any remaining session timeout and refresh timers
+    for (const timer of sessionTimers.values()) {
+      clearTimeout(timer)
+    }
+    sessionTimers.clear()
+    tokenRefresh?.cancelAll()
+
+    // Clean up any remaining worktrees from active sessions.
+    // Snapshot and clear the map first so onSessionDone (which may fire
+    // during the await below when handle.done resolves) won't try to
+    // remove the same worktrees again.
+    if (sessionWorktrees.size > 0) {
+      const remainingWorktrees = [...sessionWorktrees.values()]
+      sessionWorktrees.clear()
+      logForDebugging(
+        `[bridge:shutdown] Cleaning up ${remainingWorktrees.length} worktree(s)`,
+      )
+      await Promise.allSettled(
+        remainingWorktrees.map(wt =>
+          removeAgentWorktree(
+            wt.worktreePath,
+            wt.worktreeBranch,
+            wt.gitRoot,
+            wt.hookBased,
+          ),
+        ),
+      )
+    }
+
+    // Stop all active work items so the server knows they're done
+    await Promise.allSettled(
+      [...shutdownWorkIds.entries()].map(([sessionId, workId]) => {
+        return api
+          .stopWork(environmentId, workId, true)
+          .catch(err =>
+            logger.logVerbose(
+              `Failed to stop work ${workId} for session ${sessionId}: ${errorMessage(err)}`,
+            ),
+          )
+      }),
+    )
+  }
+
+  // Ensure all in-flight cleanup (stopWork, worktree removal) from
+  // onSessionDone completes before deregistering — otherwise
+  // process.exit() can kill them mid-flight.
+  if (pendingCleanups.size > 0) {
+    await Promise.allSettled([...pendingCleanups])
+  }
+
+  // In single-session mode with a known session, leave the session and
+  // environment alive so `claude remote-control --session-id=<id>` can resume.
+  // The backend GCs stale environments via a 4h TTL (BRIDGE_LAST_POLL_TTL).
+  // Archiving the session or deregistering the environment would make the
+  // printed resume command a lie — deregister deletes Firestore + Redis stream.
+  // Skip when the loop exited fatally (env expired, auth failed, give-up) —
+  // resume is impossible in those cases and the message would contradict the
+  // error already printed.
+  // feature('KAIROS') gate: --session-id is ant-only; without the gate,
+  // revert to the pre-PR behavior (archive + deregister on every shutdown).
+  if (
+    feature('KAIROS') &&
+    config.spawnMode === 'single-session' &&
+    initialSessionId &&
+    !fatalExit
+  ) {
+    logger.logStatus(
+      `Resume this session by running \`claude remote-control --continue\``,
+    )
+    logForDebugging(
+      `[bridge:shutdown] Skipping archive+deregister to allow resume of session ${initialSessionId}`,
+    )
+    return
+  }
+
+  // Archive all known sessions so they don't linger as idle/running on the
+  // server after the bridge goes offline.
+  if (sessionsToArchive.size > 0) {
+    logForDebugging(
+      `[bridge:shutdown] Archiving ${sessionsToArchive.size} session(s)`,
+    )
+    await Promise.allSettled(
+      [...sessionsToArchive].map(sessionId =>
+        api
+          .archiveSession(
+            compatIdSnapshot.get(sessionId) ?? toCompatSessionId(sessionId),
+          )
+          .catch(err =>
+            logger.logVerbose(
+              `Failed to archive session ${sessionId}: ${errorMessage(err)}`,
+            ),
+          ),
+      ),
+    )
+  }
+
+  // Deregister the environment so the web UI shows the bridge as offline
+  // and the Redis stream is cleaned up.
+  try {
+    await api.deregisterEnvironment(environmentId)
+    logForDebugging(
+      `[bridge:shutdown] Environment deregistered, bridge offline`,
+    )
+    logger.logVerbose('Environment deregistered.')
+  } catch (err) {
+    logger.logVerbose(`Failed to deregister environment: ${errorMessage(err)}`)
+  }
+
+  // Clear the crash-recovery pointer — the env is gone, pointer would be
+  // stale. The early return above (resumable SIGINT shutdown) skips this,
+  // leaving the pointer as a backup for the printed --session-id hint.
+  const { clearBridgePointer } = await import('./bridgePointer.js')
+  await clearBridgePointer(config.dir)
+
+  logger.logVerbose('Environment offline.')
+}
+
+const CONNECTION_ERROR_CODES = new Set([
+  'ECONNREFUSED',
+  'ECONNRESET',
+  'ETIMEDOUT',
+  'ENETUNREACH',
+  'EHOSTUNREACH',
+])
+
+export function isConnectionError(err: unknown): boolean {
+  if (
+    err &&
+    typeof err === 'object' &&
+    'code' in err &&
+    typeof err.code === 'string' &&
+    CONNECTION_ERROR_CODES.has(err.code)
+  ) {
+    return true
+  }
+  return false
+}
+
+/** Detect HTTP 5xx errors from axios (code: 'ERR_BAD_RESPONSE'). */
+export function isServerError(err: unknown): boolean {
+  return (
+    !!err &&
+    typeof err === 'object' &&
+    'code' in err &&
+    typeof err.code === 'string' &&
+    err.code === 'ERR_BAD_RESPONSE'
+  )
+}
+
+/** Add ±25% jitter to a delay value. */
+function addJitter(ms: number): number {
+  return Math.max(0, ms + ms * 0.25 * (2 * Math.random() - 1))
+}
+
+function formatDelay(ms: number): string {
+  return ms >= 1000 ? `${(ms / 1000).toFixed(1)}s` : `${Math.round(ms)}ms`
+}
+
+/**
+ * Retry stopWork with exponential backoff (3 attempts, 1s/2s/4s).
+ * Ensures the server learns the work item ended, preventing server-side zombies.
+ */
+async function stopWorkWithRetry(
+  api: BridgeApiClient,
+  environmentId: string,
+  workId: string,
+  logger: BridgeLogger,
+  baseDelayMs = 1000,
+): Promise<void> {
+  const MAX_ATTEMPTS = 3
+
+  for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+    try {
+      await api.stopWork(environmentId, workId, false)
+      logForDebugging(
+        `[bridge:work] stopWork succeeded for workId=${workId} on attempt ${attempt}/${MAX_ATTEMPTS}`,
+      )
+      return
+    } catch (err) {
+      // Auth/permission errors won't be fixed by retrying
+      if (err instanceof BridgeFatalError) {
+        if (isSuppressible403(err)) {
+          logForDebugging(
+            `[bridge:work] Suppressed stopWork 403 for ${workId}: ${err.message}`,
+          )
+        } else {
+          logger.logError(`Failed to stop work ${workId}: ${err.message}`)
+        }
+        logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
+          attempts: attempt,
+          fatal: true,
+        })
+        return
+      }
+      const errMsg = errorMessage(err)
+      if (attempt < MAX_ATTEMPTS) {
+        const delay = addJitter(baseDelayMs * Math.pow(2, attempt - 1))
+        logger.logVerbose(
+          `Failed to stop work ${workId} (attempt ${attempt}/${MAX_ATTEMPTS}), retrying in ${formatDelay(delay)}: ${errMsg}`,
+        )
+        await sleep(delay)
+      } else {
+        logger.logError(
+          `Failed to stop work ${workId} after ${MAX_ATTEMPTS} attempts: ${errMsg}`,
+        )
+        logForDiagnosticsNoPII('error', 'bridge_stop_work_failed', {
+          attempts: MAX_ATTEMPTS,
+        })
+      }
+    }
+  }
+}
+
+function onSessionTimeout(
+  sessionId: string,
+  timeoutMs: number,
+  logger: BridgeLogger,
+  timedOutSessions: Set<string>,
+  handle: SessionHandle,
+): void {
+  logForDebugging(
+    `[bridge:session] sessionId=${sessionId} timed out after ${formatDuration(timeoutMs)}`,
+  )
+  logEvent('tengu_bridge_session_timeout', {
+    timeout_ms: timeoutMs,
+  })
+  logger.logSessionFailed(
+    sessionId,
+    `Session timed out after ${formatDuration(timeoutMs)}`,
+  )
+  timedOutSessions.add(sessionId)
+  handle.kill()
+}
+
+export type ParsedArgs = {
+  verbose: boolean
+  sandbox: boolean
+  debugFile?: string
+  sessionTimeoutMs?: number
+  permissionMode?: string
+  name?: string
+  /** Value passed to --spawn (if any); undefined if no --spawn flag was given. */
+  spawnMode: SpawnMode | undefined
+  /** Value passed to --capacity (if any); undefined if no --capacity flag was given. */
+  capacity: number | undefined
+  /** --[no-]create-session-in-dir override; undefined = use default (on). */
+  createSessionInDir: boolean | undefined
+  /** Resume an existing session instead of creating a new one. */
+  sessionId?: string
+  /** Resume the last session in this directory (reads bridge-pointer.json). */
+  continueSession: boolean
+  help: boolean
+  error?: string
+}
+
+const SPAWN_FLAG_VALUES = ['session', 'same-dir', 'worktree'] as const
+
+function parseSpawnValue(raw: string | undefined): SpawnMode | string {
+  if (raw === 'session') return 'single-session'
+  if (raw === 'same-dir') return 'same-dir'
+  if (raw === 'worktree') return 'worktree'
+  return `--spawn requires one of: ${SPAWN_FLAG_VALUES.join(', ')} (got: ${raw ?? '<missing>'})`
+}
+
+function parseCapacityValue(raw: string | undefined): number | string {
+  const n = raw === undefined ? NaN : parseInt(raw, 10)
+  if (isNaN(n) || n < 1) {
+    return `--capacity requires a positive integer (got: ${raw ?? '<missing>'})`
+  }
+  return n
+}
+
+export function parseArgs(args: string[]): ParsedArgs {
+  let verbose = false
+  let sandbox = false
+  let debugFile: string | undefined
+  let sessionTimeoutMs: number | undefined
+  let permissionMode: string | undefined
+  let name: string | undefined
+  let help = false
+  let spawnMode: SpawnMode | undefined
+  let capacity: number | undefined
+  let createSessionInDir: boolean | undefined
+  let sessionId: string | undefined
+  let continueSession = false
+
+  for (let i = 0; i < args.length; i++) {
+    const arg = args[i]!
+    if (arg === '--help' || arg === '-h') {
+      help = true
+    } else if (arg === '--verbose' || arg === '-v') {
+      verbose = true
+    } else if (arg === '--sandbox') {
+      sandbox = true
+    } else if (arg === '--no-sandbox') {
+      sandbox = false
+    } else if (arg === '--debug-file' && i + 1 < args.length) {
+      debugFile = resolve(args[++i]!)
+    } else if (arg.startsWith('--debug-file=')) {
+      debugFile = resolve(arg.slice('--debug-file='.length))
+    } else if (arg === '--session-timeout' && i + 1 < args.length) {
+      sessionTimeoutMs = parseInt(args[++i]!, 10) * 1000
+    } else if (arg.startsWith('--session-timeout=')) {
+      sessionTimeoutMs =
+        parseInt(arg.slice('--session-timeout='.length), 10) * 1000
+    } else if (arg === '--permission-mode' && i + 1 < args.length) {
+      permissionMode = args[++i]!
+    } else if (arg.startsWith('--permission-mode=')) {
+      permissionMode = arg.slice('--permission-mode='.length)
+    } else if (arg === '--name' && i + 1 < args.length) {
+      name = args[++i]!
+    } else if (arg.startsWith('--name=')) {
+      name = arg.slice('--name='.length)
+    } else if (
+      feature('KAIROS') &&
+      arg === '--session-id' &&
+      i + 1 < args.length
+    ) {
+      sessionId = args[++i]!
+      if (!sessionId) {
+        return makeError('--session-id requires a value')
+      }
+    } else if (feature('KAIROS') && arg.startsWith('--session-id=')) {
+      sessionId = arg.slice('--session-id='.length)
+      if (!sessionId) {
+        return makeError('--session-id requires a value')
+      }
+    } else if (feature('KAIROS') && (arg === '--continue' || arg === '-c')) {
+      continueSession = true
+    } else if (arg === '--spawn' || arg.startsWith('--spawn=')) {
+      if (spawnMode !== undefined) {
+        return makeError('--spawn may only be specified once')
+      }
+      const raw = arg.startsWith('--spawn=')
+        ? arg.slice('--spawn='.length)
+        : args[++i]
+      const v = parseSpawnValue(raw)
+      if (v === 'single-session' || v === 'same-dir' || v === 'worktree') {
+        spawnMode = v
+      } else {
+        return makeError(v)
+      }
+    } else if (arg === '--capacity' || arg.startsWith('--capacity=')) {
+      if (capacity !== undefined) {
+        return makeError('--capacity may only be specified once')
+      }
+      const raw = arg.startsWith('--capacity=')
+        ? arg.slice('--capacity='.length)
+        : args[++i]
+      const v = parseCapacityValue(raw)
+      if (typeof v === 'number') capacity = v
+      else return makeError(v)
+    } else if (arg === '--create-session-in-dir') {
+      createSessionInDir = true
+    } else if (arg === '--no-create-session-in-dir') {
+      createSessionInDir = false
+    } else {
+      return makeError(
+        `Unknown argument: ${arg}\nRun 'claude remote-control --help' for usage.`,
+      )
+    }
+  }
+
+  // Note: gate check for --spawn/--capacity/--create-session-in-dir is in bridgeMain
+  // (gate-aware error). Flag cross-validation happens here.
+
+  // --capacity only makes sense for multi-session modes.
+  if (spawnMode === 'single-session' && capacity !== undefined) {
+    return makeError(
+      `--capacity cannot be used with --spawn=session (single-session mode has fixed capacity 1).`,
+    )
+  }
+
+  // --session-id / --continue resume a specific session on its original
+  // environment; incompatible with spawn-related flags (which configure
+  // fresh session creation), and mutually exclusive with each other.
+  if (
+    (sessionId || continueSession) &&
+    (spawnMode !== undefined ||
+      capacity !== undefined ||
+      createSessionInDir !== undefined)
+  ) {
+    return makeError(
+      `--session-id and --continue cannot be used with --spawn, --capacity, or --create-session-in-dir.`,
+    )
+  }
+  if (sessionId && continueSession) {
+    return makeError(`--session-id and --continue cannot be used together.`)
+  }
+
+  return {
+    verbose,
+    sandbox,
+    debugFile,
+    sessionTimeoutMs,
+    permissionMode,
+    name,
+    spawnMode,
+    capacity,
+    createSessionInDir,
+    sessionId,
+    continueSession,
+    help,
+  }
+
+  function makeError(error: string): ParsedArgs {
+    return {
+      verbose,
+      sandbox,
+      debugFile,
+      sessionTimeoutMs,
+      permissionMode,
+      name,
+      spawnMode,
+      capacity,
+      createSessionInDir,
+      sessionId,
+      continueSession,
+      help,
+      error,
+    }
+  }
+}
+
+async function printHelp(): Promise<void> {
+  // Use EXTERNAL_PERMISSION_MODES for help text — internal modes (bubble)
+  // are ant-only and auto is feature-gated; they're still accepted by validation.
+  const { EXTERNAL_PERMISSION_MODES } = await import('../types/permissions.js')
+  const modes = EXTERNAL_PERMISSION_MODES.join(', ')
+  const showServer = await isMultiSessionSpawnEnabled()
+  const serverOptions = showServer
+    ? `  --spawn <mode>                   Spawn mode: same-dir, worktree, session
+                                   (default: same-dir)
+  --capacity <N>                   Max concurrent sessions in worktree or
+                                   same-dir mode (default: ${SPAWN_SESSIONS_DEFAULT})
+  --[no-]create-session-in-dir     Pre-create a session in the current
+                                   directory; in worktree mode this session
+                                   stays in cwd while on-demand sessions get
+                                   isolated worktrees (default: on)
+`
+    : ''
+  const serverDescription = showServer
+    ? `
+  Remote Control runs as a persistent server that accepts multiple concurrent
+  sessions in the current directory. One session is pre-created on start so
+  you have somewhere to type immediately. Use --spawn=worktree to isolate
+  each on-demand session in its own git worktree, or --spawn=session for
+  the classic single-session mode (exits when that session ends). Press 'w'
+  during runtime to toggle between same-dir and worktree.
+`
+    : ''
+  const serverNote = showServer
+    ? `  - Worktree mode requires a git repository or WorktreeCreate/WorktreeRemove hooks
+`
+    : ''
+  const help = `
+Remote Control - Connect your local environment to claude.ai/code
+
+USAGE
+  claude remote-control [options]
+OPTIONS
+  --name <name>                    Name for the session (shown in claude.ai/code)
+${
+  feature('KAIROS')
+    ? `  -c, --continue                   Resume the last session in this directory
+  --session-id <id>                Resume a specific session by ID (cannot be
+                                   used with spawn flags or --continue)
+`
+    : ''
+}  --permission-mode <mode>         Permission mode for spawned sessions
+                                   (${modes})
+  --debug-file <path>              Write debug logs to file
+  -v, --verbose                    Enable verbose output
+  -h, --help                       Show this help
+${serverOptions}
+DESCRIPTION
+  Remote Control allows you to control sessions on your local device from
+  claude.ai/code (https://claude.ai/code). Run this command in the
+  directory you want to work in, then connect from the Claude app or web.
+${serverDescription}
+NOTES
+  - You must be logged in with a Claude account that has a subscription
+  - Run \`claude\` first in the directory to accept the workspace trust dialog
+${serverNote}`
+  // biome-ignore lint/suspicious/noConsole: intentional help output
+  console.log(help)
+}
+
+const TITLE_MAX_LEN = 80
+
+/** Derive a session title from a user message: first line, truncated. */
+function deriveSessionTitle(text: string): string {
+  // Collapse whitespace — newlines/tabs would break the single-line status display.
+  const flat = text.replace(/\s+/g, ' ').trim()
+  return truncateToWidth(flat, TITLE_MAX_LEN)
+}
+
+/**
+ * One-shot fetch of a session's title via GET /v1/sessions/{id}.
+ *
+ * Uses `getBridgeSession` from createSession.ts (ccr-byoc headers + org UUID)
+ * rather than the environments-level bridgeApi client, whose headers make the
+ * Sessions API return 404. Returns undefined if the session has no title yet
+ * or the fetch fails — the caller falls back to deriving a title from the
+ * first user message.
+ */
+async function fetchSessionTitle(
+  compatSessionId: string,
+  baseUrl: string,
+): Promise<string | undefined> {
+  const { getBridgeSession } = await import('./createSession.js')
+  const session = await getBridgeSession(compatSessionId, { baseUrl })
+  return session?.title || undefined
+}
+
+export async function bridgeMain(args: string[]): Promise<void> {
+  const parsed = parseArgs(args)
+
+  if (parsed.help) {
+    await printHelp()
+    return
+  }
+  if (parsed.error) {
+    // biome-ignore lint/suspicious/noConsole: intentional error output
+    console.error(`Error: ${parsed.error}`)
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  const {
+    verbose,
+    sandbox,
+    debugFile,
+    sessionTimeoutMs,
+    permissionMode,
+    name,
+    spawnMode: parsedSpawnMode,
+    capacity: parsedCapacity,
+    createSessionInDir: parsedCreateSessionInDir,
+    sessionId: parsedSessionId,
+    continueSession,
+  } = parsed
+  // Mutable so --continue can set it from the pointer file. The #20460
+  // resume flow below then treats it the same as an explicit --session-id.
+  let resumeSessionId = parsedSessionId
+  // When --continue found a pointer, this is the directory it came from
+  // (may be a worktree sibling, not `dir`). On resume-flow deterministic
+  // failure, clear THIS file so --continue doesn't keep hitting the same
+  // dead session. Undefined for explicit --session-id (leaves pointer alone).
+  let resumePointerDir: string | undefined
+
+  const usedMultiSessionFeature =
+    parsedSpawnMode !== undefined ||
+    parsedCapacity !== undefined ||
+    parsedCreateSessionInDir !== undefined
+
+  // Validate permission mode early so the user gets an error before
+  // the bridge starts polling for work.
+  if (permissionMode !== undefined) {
+    const { PERMISSION_MODES } = await import('../types/permissions.js')
+    const valid: readonly string[] = PERMISSION_MODES
+    if (!valid.includes(permissionMode)) {
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Invalid permission mode '${permissionMode}'. Valid modes: ${valid.join(', ')}`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+  }
+
+  const dir = resolve('.')
+
+  // The bridge fast-path bypasses init.ts, so we must enable config reading
+  // before any code that transitively calls getGlobalConfig()
+  const { enableConfigs, checkHasTrustDialogAccepted } = await import(
+    '../utils/config.js'
+  )
+  enableConfigs()
+
+  // Initialize analytics and error reporting sinks. The bridge bypasses the
+  // setup() init flow, so we call initSinks() directly to attach sinks here.
+  const { initSinks } = await import('../utils/sinks.js')
+  initSinks()
+
+  // Gate-aware validation: --spawn / --capacity / --create-session-in-dir require
+  // the multi-session gate. parseArgs has already validated flag combinations;
+  // here we only check the gate since that requires an async GrowthBook call.
+  // Runs after enableConfigs() (GrowthBook cache reads global config) and after
+  // initSinks() so the denial event can be enqueued.
+  const multiSessionEnabled = await isMultiSessionSpawnEnabled()
+  if (usedMultiSessionFeature && !multiSessionEnabled) {
+    await logEventAsync('tengu_bridge_multi_session_denied', {
+      used_spawn: parsedSpawnMode !== undefined,
+      used_capacity: parsedCapacity !== undefined,
+      used_create_session_in_dir: parsedCreateSessionInDir !== undefined,
+    })
+    // logEventAsync only enqueues — process.exit() discards buffered events.
+    // Flush explicitly, capped at 500ms to match gracefulShutdown.ts.
+    // (sleep() doesn't unref its timer, but process.exit() follows immediately
+    // so the ref'd timer can't delay shutdown.)
+    await Promise.race([
+      Promise.all([shutdown1PEventLogging(), shutdownDatadog()]),
+      sleep(500, undefined, { unref: true }),
+    ]).catch(() => {})
+    // biome-ignore lint/suspicious/noConsole: intentional error output
+    console.error(
+      'Error: Multi-session Remote Control is not enabled for your account yet.',
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Set the bootstrap CWD so that trust checks, project config lookups, and
+  // git utilities (getBranch, getRemoteUrl) resolve against the correct path.
+  const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
+  setOriginalCwd(dir)
+  setCwdState(dir)
+
+  // The bridge bypasses main.tsx (which renders the interactive TrustDialog via showSetupScreens),
+  // so we must verify trust was previously established by a normal `claude` session.
+  if (!checkHasTrustDialogAccepted()) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      `Error: Workspace not trusted. Please run \`claude\` in ${dir} first to review and accept the workspace trust dialog.`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Resolve auth
+  const { clearOAuthTokenCache, checkAndRefreshOAuthTokenIfNeeded } =
+    await import('../utils/auth.js')
+  const { getBridgeAccessToken, getBridgeBaseUrl } = await import(
+    './bridgeConfig.js'
+  )
+
+  const bridgeToken = getBridgeAccessToken()
+  if (!bridgeToken) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(BRIDGE_LOGIN_ERROR)
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // First-time remote dialog — explain what bridge does and get consent
+  const {
+    getGlobalConfig,
+    saveGlobalConfig,
+    getCurrentProjectConfig,
+    saveCurrentProjectConfig,
+  } = await import('../utils/config.js')
+  if (!getGlobalConfig().remoteDialogSeen) {
+    const readline = await import('readline')
+    const rl = readline.createInterface({
+      input: process.stdin,
+      output: process.stdout,
+    })
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(
+      '\nRemote Control lets you access this CLI session from the web (claude.ai/code)\nor the Claude app, so you can pick up where you left off on any device.\n\nYou can disconnect remote access anytime by running /remote-control again.\n',
+    )
+    const answer = await new Promise<string>(resolve => {
+      rl.question('Enable Remote Control? (y/n) ', resolve)
+    })
+    rl.close()
+    saveGlobalConfig(current => {
+      if (current.remoteDialogSeen) return current
+      return { ...current, remoteDialogSeen: true }
+    })
+    if (answer.toLowerCase() !== 'y' && answer.toLowerCase() !== 'yes') {
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(0)
+    }
+  }
+
+  // --continue: resolve the most recent session from the crash-recovery
+  // pointer and chain into the #20460 --session-id flow. Worktree-aware:
+  // checks current dir first (fast path, zero exec), then fans out to git
+  // worktree siblings if that misses — the REPL bridge writes to
+  // getOriginalCwd() which EnterWorktreeTool/activeWorktreeSession can
+  // point at a worktree while the user's shell is at the repo root.
+  // KAIROS-gated at parseArgs — continueSession is always false in external
+  // builds, so this block tree-shakes.
+  if (feature('KAIROS') && continueSession) {
+    const { readBridgePointerAcrossWorktrees } = await import(
+      './bridgePointer.js'
+    )
+    const found = await readBridgePointerAcrossWorktrees(dir)
+    if (!found) {
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: No recent session found in this directory or its worktrees. Run \`claude remote-control\` to start a new one.`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    const { pointer, dir: pointerDir } = found
+    const ageMin = Math.round(pointer.ageMs / 60_000)
+    const ageStr = ageMin < 60 ? `${ageMin}m` : `${Math.round(ageMin / 60)}h`
+    const fromWt = pointerDir !== dir ? ` from worktree ${pointerDir}` : ''
+    // biome-ignore lint/suspicious/noConsole: intentional info output
+    console.error(
+      `Resuming session ${pointer.sessionId} (${ageStr} ago)${fromWt}\u2026`,
+    )
+    resumeSessionId = pointer.sessionId
+    // Track where the pointer came from so the #20460 exit(1) paths below
+    // clear the RIGHT file on deterministic failure — otherwise --continue
+    // would keep hitting the same dead session. May be a worktree sibling.
+    resumePointerDir = pointerDir
+  }
+
+  // In production, baseUrl is the Anthropic API (from OAuth config).
+  // CLAUDE_BRIDGE_BASE_URL overrides this for ant local dev only.
+  const baseUrl = getBridgeBaseUrl()
+
+  // For non-localhost targets, require HTTPS to protect credentials.
+  if (
+    baseUrl.startsWith('http://') &&
+    !baseUrl.includes('localhost') &&
+    !baseUrl.includes('127.0.0.1')
+  ) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      'Error: Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Session ingress URL for WebSocket connections. In production this is the
+  // same as baseUrl (Envoy routes /v1/session_ingress/* to session-ingress).
+  // Locally, session-ingress runs on a different port (9413) than the
+  // contain-provide-api (8211), so CLAUDE_BRIDGE_SESSION_INGRESS_URL must be
+  // set explicitly. Ant-only, matching CLAUDE_BRIDGE_BASE_URL.
+  const sessionIngressUrl =
+    process.env.USER_TYPE === 'ant' &&
+    process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      : baseUrl
+
+  const { getBranch, getRemoteUrl, findGitRoot } = await import(
+    '../utils/git.js'
+  )
+
+  // Precheck worktree availability for the first-run dialog and the `w`
+  // toggle. Unconditional so we know upfront whether worktree is an option.
+  const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
+  const worktreeAvailable = hasWorktreeCreateHook() || findGitRoot(dir) !== null
+
+  // Load saved per-project spawn-mode preference. Gated by multiSessionEnabled
+  // so a GrowthBook rollback cleanly reverts users to single-session —
+  // otherwise a saved pref would silently re-enable multi-session behavior
+  // (worktree isolation, 32 max sessions, w toggle) despite the gate being off.
+  // Also guard against a stale worktree pref left over from when this dir WAS
+  // a git repo (or the user copied config) — clear it on disk so the warning
+  // doesn't repeat on every launch.
+  let savedSpawnMode = multiSessionEnabled
+    ? getCurrentProjectConfig().remoteControlSpawnMode
+    : undefined
+  if (savedSpawnMode === 'worktree' && !worktreeAvailable) {
+    // biome-ignore lint/suspicious/noConsole: intentional warning output
+    console.error(
+      'Warning: Saved spawn mode is worktree but this directory is not a git repository. Falling back to same-dir.',
+    )
+    savedSpawnMode = undefined
+    saveCurrentProjectConfig(current => {
+      if (current.remoteControlSpawnMode === undefined) return current
+      return { ...current, remoteControlSpawnMode: undefined }
+    })
+  }
+
+  // First-run spawn-mode choice: ask once per project when the choice is
+  // meaningful (gate on, both modes available, no explicit override, not
+  // resuming). Saves to ProjectConfig so subsequent runs skip this.
+  if (
+    multiSessionEnabled &&
+    !savedSpawnMode &&
+    worktreeAvailable &&
+    parsedSpawnMode === undefined &&
+    !resumeSessionId &&
+    process.stdin.isTTY
+  ) {
+    const readline = await import('readline')
+    const rl = readline.createInterface({
+      input: process.stdin,
+      output: process.stdout,
+    })
+    // biome-ignore lint/suspicious/noConsole: intentional dialog output
+    console.log(
+      `\nClaude Remote Control is launching in spawn mode which lets you create new sessions in this project from Claude Code on Web or your Mobile app. Learn more here: https://code.claude.com/docs/en/remote-control\n\n` +
+        `Spawn mode for this project:\n` +
+        `  [1] same-dir \u2014 sessions share the current directory (default)\n` +
+        `  [2] worktree \u2014 each session gets an isolated git worktree\n\n` +
+        `This can be changed later or explicitly set with --spawn=same-dir or --spawn=worktree.\n`,
+    )
+    const answer = await new Promise<string>(resolve => {
+      rl.question('Choose [1/2] (default: 1): ', resolve)
+    })
+    rl.close()
+    const chosen: 'same-dir' | 'worktree' =
+      answer.trim() === '2' ? 'worktree' : 'same-dir'
+    savedSpawnMode = chosen
+    logEvent('tengu_bridge_spawn_mode_chosen', {
+      spawn_mode:
+        chosen as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+    saveCurrentProjectConfig(current => {
+      if (current.remoteControlSpawnMode === chosen) return current
+      return { ...current, remoteControlSpawnMode: chosen }
+    })
+  }
+
+  // Determine effective spawn mode.
+  // Precedence: resume > explicit --spawn > saved project pref > gate default
+  // - resuming via --continue / --session-id: always single-session (resume
+  //   targets one specific session in its original directory)
+  // - explicit --spawn flag: use that value directly (does not persist)
+  // - saved ProjectConfig.remoteControlSpawnMode: set by first-run dialog or `w`
+  // - default with gate on: same-dir (persistent multi-session, shared cwd)
+  // - default with gate off: single-session (unchanged legacy behavior)
+  // Track how spawn mode was determined, for rollout analytics.
+  type SpawnModeSource = 'resume' | 'flag' | 'saved' | 'gate_default'
+  let spawnModeSource: SpawnModeSource
+  let spawnMode: SpawnMode
+  if (resumeSessionId) {
+    spawnMode = 'single-session'
+    spawnModeSource = 'resume'
+  } else if (parsedSpawnMode !== undefined) {
+    spawnMode = parsedSpawnMode
+    spawnModeSource = 'flag'
+  } else if (savedSpawnMode !== undefined) {
+    spawnMode = savedSpawnMode
+    spawnModeSource = 'saved'
+  } else {
+    spawnMode = multiSessionEnabled ? 'same-dir' : 'single-session'
+    spawnModeSource = 'gate_default'
+  }
+  const maxSessions =
+    spawnMode === 'single-session'
+      ? 1
+      : (parsedCapacity ?? SPAWN_SESSIONS_DEFAULT)
+  // Pre-create an empty session on start so the user has somewhere to type
+  // immediately, running in the current directory (exempted from worktree
+  // creation in the spawn loop). On by default; --no-create-session-in-dir
+  // opts out for a pure on-demand server where every session is isolated.
+  // The effectiveResumeSessionId guard at the creation site handles the
+  // resume case (skip creation when resume succeeded; fall through to
+  // fresh creation on env-mismatch fallback).
+  const preCreateSession = parsedCreateSessionInDir ?? true
+
+  // Without --continue: a leftover pointer means the previous run didn't
+  // shut down cleanly (crash, kill -9, terminal closed). Clear it so the
+  // stale env doesn't linger past its relevance. Runs in all modes
+  // (clearBridgePointer is a no-op when no file exists) — covers the
+  // gate-transition case where a user crashed in single-session mode then
+  // starts fresh in worktree mode. Only single-session mode writes new
+  // pointers.
+  if (!resumeSessionId) {
+    const { clearBridgePointer } = await import('./bridgePointer.js')
+    await clearBridgePointer(dir)
+  }
+
+  // Worktree mode requires either git or WorktreeCreate/WorktreeRemove hooks.
+  // Only reachable via explicit --spawn=worktree (default is same-dir);
+  // saved worktree pref was already guarded above.
+  if (spawnMode === 'worktree' && !worktreeAvailable) {
+    // biome-ignore lint/suspicious/noConsole: intentional error output
+    console.error(
+      `Error: Worktree mode requires a git repository or WorktreeCreate hooks configured. Use --spawn=session for single-session mode.`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  const branch = await getBranch()
+  const gitRepoUrl = await getRemoteUrl()
+  const machineName = hostname()
+  const bridgeId = randomUUID()
+
+  const { handleOAuth401Error } = await import('../utils/auth.js')
+  const api = createBridgeApiClient({
+    baseUrl,
+    getAccessToken: getBridgeAccessToken,
+    runnerVersion: MACRO.VERSION,
+    onDebug: logForDebugging,
+    onAuth401: handleOAuth401Error,
+    getTrustedDeviceToken,
+  })
+
+  // When resuming a session via --session-id, fetch it to learn its
+  // environment_id and reuse that for registration (idempotent on the
+  // backend). Left undefined otherwise — the backend rejects
+  // client-generated UUIDs and will allocate a fresh environment.
+  // feature('KAIROS') gate: --session-id is ant-only; parseArgs already
+  // rejects the flag when the gate is off, so resumeSessionId is always
+  // undefined here in external builds — this guard is for tree-shaking.
+  let reuseEnvironmentId: string | undefined
+  if (feature('KAIROS') && resumeSessionId) {
+    try {
+      validateBridgeId(resumeSessionId, 'sessionId')
+    } catch {
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Invalid session ID "${resumeSessionId}". Session IDs must not contain unsafe characters.`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    // Proactively refresh the OAuth token — getBridgeSession uses raw axios
+    // without the withOAuthRetry 401-refresh logic. An expired-but-present
+    // token would otherwise produce a misleading "not found" error.
+    await checkAndRefreshOAuthTokenIfNeeded()
+    clearOAuthTokenCache()
+    const { getBridgeSession } = await import('./createSession.js')
+    const session = await getBridgeSession(resumeSessionId, {
+      baseUrl,
+      getAccessToken: getBridgeAccessToken,
+    })
+    if (!session) {
+      // Session gone on server → pointer is stale. Clear it so the user
+      // isn't re-prompted next launch. (Explicit --session-id leaves the
+      // pointer alone — it's an independent file they may not even have.)
+      // resumePointerDir may be a worktree sibling — clear THAT file.
+      if (resumePointerDir) {
+        const { clearBridgePointer } = await import('./bridgePointer.js')
+        await clearBridgePointer(resumePointerDir)
+      }
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Session ${resumeSessionId} not found. It may have been archived or expired, or your login may have lapsed (run \`claude /login\`).`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    if (!session.environment_id) {
+      if (resumePointerDir) {
+        const { clearBridgePointer } = await import('./bridgePointer.js')
+        await clearBridgePointer(resumePointerDir)
+      }
+      // biome-ignore lint/suspicious/noConsole: intentional error output
+      console.error(
+        `Error: Session ${resumeSessionId} has no environment_id. It may never have been attached to a bridge.`,
+      )
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+    reuseEnvironmentId = session.environment_id
+    logForDebugging(
+      `[bridge:init] Resuming session ${resumeSessionId} on environment ${reuseEnvironmentId}`,
+    )
+  }
+
+  const config: BridgeConfig = {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    maxSessions,
+    spawnMode,
+    verbose,
+    sandbox,
+    bridgeId,
+    workerType: 'claude_code',
+    environmentId: randomUUID(),
+    reuseEnvironmentId,
+    apiBaseUrl: baseUrl,
+    sessionIngressUrl,
+    debugFile,
+    sessionTimeoutMs,
+  }
+
+  logForDebugging(
+    `[bridge:init] bridgeId=${bridgeId}${reuseEnvironmentId ? ` reuseEnvironmentId=${reuseEnvironmentId}` : ''} dir=${dir} branch=${branch} gitRepoUrl=${gitRepoUrl} machine=${machineName}`,
+  )
+  logForDebugging(
+    `[bridge:init] apiBaseUrl=${baseUrl} sessionIngressUrl=${sessionIngressUrl}`,
+  )
+  logForDebugging(
+    `[bridge:init] sandbox=${sandbox}${debugFile ? ` debugFile=${debugFile}` : ''}`,
+  )
+
+  // Register the bridge environment before entering the poll loop.
+  let environmentId: string
+  let environmentSecret: string
+  try {
+    const reg = await api.registerBridgeEnvironment(config)
+    environmentId = reg.environment_id
+    environmentSecret = reg.environment_secret
+  } catch (err) {
+    logEvent('tengu_bridge_registration_failed', {
+      status: err instanceof BridgeFatalError ? err.status : undefined,
+    })
+    // Registration failures are fatal — print a clean message instead of a stack trace.
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      err instanceof BridgeFatalError && err.status === 404
+        ? 'Remote Control environments are not available for your account.'
+        : `Error: ${errorMessage(err)}`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(1)
+  }
+
+  // Tracks whether the --session-id resume flow completed successfully.
+  // Used below to skip fresh session creation and seed initialSessionId.
+  // Cleared on env mismatch so we gracefully fall back to a new session.
+  let effectiveResumeSessionId: string | undefined
+  if (feature('KAIROS') && resumeSessionId) {
+    if (reuseEnvironmentId && environmentId !== reuseEnvironmentId) {
+      // Backend returned a different environment_id — the original env
+      // expired or was reaped. Reconnect won't work against the new env
+      // (session is bound to the old one). Log to sentry for visibility
+      // and fall through to fresh session creation on the new env.
+      logError(
+        new Error(
+          `Bridge resume env mismatch: requested ${reuseEnvironmentId}, backend returned ${environmentId}. Falling back to fresh session.`,
+        ),
+      )
+      // biome-ignore lint/suspicious/noConsole: intentional warning output
+      console.warn(
+        `Warning: Could not resume session ${resumeSessionId} — its environment has expired. Creating a fresh session instead.`,
+      )
+      // Don't deregister — we're going to use this new environment.
+      // effectiveResumeSessionId stays undefined → fresh session path below.
+    } else {
+      // Force-stop any stale worker instances for this session and re-queue
+      // it so our poll loop picks it up. Must happen after registration so
+      // the backend knows a live worker exists for the environment.
+      //
+      // The pointer stores a session_* ID but /bridge/reconnect looks
+      // sessions up by their infra tag (cse_*) when ccr_v2_compat_enabled
+      // is on. Try both; the conversion is a no-op if already cse_*.
+      const infraResumeId = toInfraSessionId(resumeSessionId)
+      const reconnectCandidates =
+        infraResumeId === resumeSessionId
+          ? [resumeSessionId]
+          : [resumeSessionId, infraResumeId]
+      let reconnected = false
+      let lastReconnectErr: unknown
+      for (const candidateId of reconnectCandidates) {
+        try {
+          await api.reconnectSession(environmentId, candidateId)
+          logForDebugging(
+            `[bridge:init] Session ${candidateId} re-queued via bridge/reconnect`,
+          )
+          effectiveResumeSessionId = resumeSessionId
+          reconnected = true
+          break
+        } catch (err) {
+          lastReconnectErr = err
+          logForDebugging(
+            `[bridge:init] reconnectSession(${candidateId}) failed: ${errorMessage(err)}`,
+          )
+        }
+      }
+      if (!reconnected) {
+        const err = lastReconnectErr
+
+        // Do NOT deregister on transient reconnect failure — at this point
+        // environmentId IS the session's own environment. Deregistering
+        // would make retry impossible. The backend's 4h TTL cleans up.
+        const isFatal = err instanceof BridgeFatalError
+        // Clear pointer only on fatal reconnect failure. Transient failures
+        // ("try running the same command again") should keep the pointer so
+        // next launch re-prompts — that IS the retry mechanism.
+        if (resumePointerDir && isFatal) {
+          const { clearBridgePointer } = await import('./bridgePointer.js')
+          await clearBridgePointer(resumePointerDir)
+        }
+        // biome-ignore lint/suspicious/noConsole: intentional error output
+        console.error(
+          isFatal
+            ? `Error: ${errorMessage(err)}`
+            : `Error: Failed to reconnect session ${resumeSessionId}: ${errorMessage(err)}\nThe session may still be resumable — try running the same command again.`,
+        )
+        // eslint-disable-next-line custom-rules/no-process-exit
+        process.exit(1)
+      }
+    }
+  }
+
+  logForDebugging(
+    `[bridge:init] Registered, server environmentId=${environmentId}`,
+  )
+  const startupPollConfig = getPollIntervalConfig()
+  logEvent('tengu_bridge_started', {
+    max_sessions: config.maxSessions,
+    has_debug_file: !!config.debugFile,
+    sandbox: config.sandbox,
+    verbose: config.verbose,
+    heartbeat_interval_ms:
+      startupPollConfig.non_exclusive_heartbeat_interval_ms,
+    spawn_mode:
+      config.spawnMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    spawn_mode_source:
+      spawnModeSource as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    multi_session_gate: multiSessionEnabled,
+    pre_create_session: preCreateSession,
+    worktree_available: worktreeAvailable,
+  })
+  logForDiagnosticsNoPII('info', 'bridge_started', {
+    max_sessions: config.maxSessions,
+    sandbox: config.sandbox,
+    spawn_mode: config.spawnMode,
+  })
+
+  const spawner = createSessionSpawner({
+    execPath: process.execPath,
+    scriptArgs: spawnScriptArgs(),
+    env: process.env,
+    verbose,
+    sandbox,
+    debugFile,
+    permissionMode,
+    onDebug: logForDebugging,
+    onActivity: (sessionId, activity) => {
+      logForDebugging(
+        `[bridge:activity] sessionId=${sessionId} ${activity.type} ${activity.summary}`,
+      )
+    },
+    onPermissionRequest: (sessionId, request, _accessToken) => {
+      logForDebugging(
+        `[bridge:perm] sessionId=${sessionId} tool=${request.request.tool_name} request_id=${request.request_id} (not auto-approving)`,
+      )
+    },
+  })
+
+  const logger = createBridgeLogger({ verbose })
+  const { parseGitHubRepository } = await import('../utils/detectRepository.js')
+  const ownerRepo = gitRepoUrl ? parseGitHubRepository(gitRepoUrl) : null
+  // Use the repo name from the parsed owner/repo, or fall back to the dir basename
+  const repoName = ownerRepo ? ownerRepo.split('/').pop()! : basename(dir)
+  logger.setRepoInfo(repoName, branch)
+
+  // `w` toggle is available iff we're in a multi-session mode AND worktree
+  // is a valid option. When unavailable, the mode suffix and hint are hidden.
+  const toggleAvailable = spawnMode !== 'single-session' && worktreeAvailable
+  if (toggleAvailable) {
+    // Safe cast: spawnMode is not single-session (checked above), and the
+    // saved-worktree-in-non-git guard + exit check above ensure worktree
+    // is only reached when available.
+    logger.setSpawnModeDisplay(spawnMode as 'same-dir' | 'worktree')
+  }
+
+  // Listen for keys: space toggles QR code, w toggles spawn mode
+  const onStdinData = (data: Buffer): void => {
+    if (data[0] === 0x03 || data[0] === 0x04) {
+      // Ctrl+C / Ctrl+D — trigger graceful shutdown
+      process.emit('SIGINT')
+      return
+    }
+    if (data[0] === 0x20 /* space */) {
+      logger.toggleQr()
+      return
+    }
+    if (data[0] === 0x77 /* 'w' */) {
+      if (!toggleAvailable) return
+      const newMode: 'same-dir' | 'worktree' =
+        config.spawnMode === 'same-dir' ? 'worktree' : 'same-dir'
+      config.spawnMode = newMode
+      logEvent('tengu_bridge_spawn_mode_toggled', {
+        spawn_mode:
+          newMode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+      logger.logStatus(
+        newMode === 'worktree'
+          ? 'Spawn mode: worktree (new sessions get isolated git worktrees)'
+          : 'Spawn mode: same-dir (new sessions share the current directory)',
+      )
+      logger.setSpawnModeDisplay(newMode)
+      logger.refreshDisplay()
+      saveCurrentProjectConfig(current => {
+        if (current.remoteControlSpawnMode === newMode) return current
+        return { ...current, remoteControlSpawnMode: newMode }
+      })
+      return
+    }
+  }
+  if (process.stdin.isTTY) {
+    process.stdin.setRawMode(true)
+    process.stdin.resume()
+    process.stdin.on('data', onStdinData)
+  }
+
+  const controller = new AbortController()
+  const onSigint = (): void => {
+    logForDebugging('[bridge:shutdown] SIGINT received, shutting down')
+    controller.abort()
+  }
+  const onSigterm = (): void => {
+    logForDebugging('[bridge:shutdown] SIGTERM received, shutting down')
+    controller.abort()
+  }
+  process.on('SIGINT', onSigint)
+  process.on('SIGTERM', onSigterm)
+
+  // Auto-create an empty session so the user has somewhere to type
+  // immediately (matching /remote-control behavior). Controlled by
+  // preCreateSession: on by default; --no-create-session-in-dir opts out.
+  // When a --session-id resume succeeded, skip creation entirely — the
+  // session already exists and bridge/reconnect has re-queued it.
+  // When resume was requested but failed on env mismatch, effectiveResumeSessionId
+  // is undefined, so we fall through to fresh session creation (honoring the
+  // "Creating a fresh session instead" warning printed above).
+  let initialSessionId: string | null =
+    feature('KAIROS') && effectiveResumeSessionId
+      ? effectiveResumeSessionId
+      : null
+  if (preCreateSession && !(feature('KAIROS') && effectiveResumeSessionId)) {
+    const { createBridgeSession } = await import('./createSession.js')
+    try {
+      initialSessionId = await createBridgeSession({
+        environmentId,
+        title: name,
+        events: [],
+        gitRepoUrl,
+        branch,
+        signal: controller.signal,
+        baseUrl,
+        getAccessToken: getBridgeAccessToken,
+        permissionMode,
+      })
+      if (initialSessionId) {
+        logForDebugging(
+          `[bridge:init] Created initial session ${initialSessionId}`,
+        )
+      }
+    } catch (err) {
+      logForDebugging(
+        `[bridge:init] Session creation failed (non-fatal): ${errorMessage(err)}`,
+      )
+    }
+  }
+
+  // Crash-recovery pointer: write immediately so kill -9 at any point
+  // after this leaves a recoverable trail. Covers both fresh sessions and
+  // resumed ones (so a second crash after resume is still recoverable).
+  // Cleared when runBridgeLoop falls through to archive+deregister; left in
+  // place on the SIGINT resumable-shutdown return (backup for when the user
+  // closes the terminal before copying the printed --session-id hint).
+  // Refreshed hourly so a 5h+ session that crashes still has a fresh
+  // pointer (staleness checks file mtime, backend TTL is rolling-from-poll).
+  let pointerRefreshTimer: ReturnType<typeof setInterval> | null = null
+  // Single-session only: --continue forces single-session mode on resume,
+  // so a pointer written in multi-session mode would contradict the user's
+  // config when they try to resume. The resumable-shutdown path is also
+  // gated to single-session (line ~1254) so the pointer would be orphaned.
+  if (initialSessionId && spawnMode === 'single-session') {
+    const { writeBridgePointer } = await import('./bridgePointer.js')
+    const pointerPayload = {
+      sessionId: initialSessionId,
+      environmentId,
+      source: 'standalone' as const,
+    }
+    await writeBridgePointer(config.dir, pointerPayload)
+    pointerRefreshTimer = setInterval(
+      writeBridgePointer,
+      60 * 60 * 1000,
+      config.dir,
+      pointerPayload,
+    )
+    // Don't let the interval keep the process alive on its own.
+    pointerRefreshTimer.unref?.()
+  }
+
+  try {
+    await runBridgeLoop(
+      config,
+      environmentId,
+      environmentSecret,
+      api,
+      spawner,
+      logger,
+      controller.signal,
+      undefined,
+      initialSessionId ?? undefined,
+      async () => {
+        // Clear the memoized OAuth token cache so we re-read from secure
+        // storage, picking up tokens refreshed by child processes.
+        clearOAuthTokenCache()
+        // Proactively refresh the token if it's expired on disk too.
+        await checkAndRefreshOAuthTokenIfNeeded()
+        return getBridgeAccessToken()
+      },
+    )
+  } finally {
+    if (pointerRefreshTimer !== null) {
+      clearInterval(pointerRefreshTimer)
+    }
+    process.off('SIGINT', onSigint)
+    process.off('SIGTERM', onSigterm)
+    process.stdin.off('data', onStdinData)
+    if (process.stdin.isTTY) {
+      process.stdin.setRawMode(false)
+    }
+    process.stdin.pause()
+  }
+
+  // The bridge bypasses init.ts (and its graceful shutdown handler), so we
+  // must exit explicitly.
+  // eslint-disable-next-line custom-rules/no-process-exit
+  process.exit(0)
+}
+
+// ─── Headless bridge (daemon worker) ────────────────────────────────────────
+
+/**
+ * Thrown by runBridgeHeadless for configuration issues the supervisor should
+ * NOT retry (trust not accepted, worktree unavailable, http-not-https). The
+ * daemon worker catches this and exits with EXIT_CODE_PERMANENT so the
+ * supervisor parks the worker instead of respawning it on backoff.
+ */
+export class BridgeHeadlessPermanentError extends Error {
+  constructor(message: string) {
+    super(message)
+    this.name = 'BridgeHeadlessPermanentError'
+  }
+}
+
+export type HeadlessBridgeOpts = {
+  dir: string
+  name?: string
+  spawnMode: 'same-dir' | 'worktree'
+  capacity: number
+  permissionMode?: string
+  sandbox: boolean
+  sessionTimeoutMs?: number
+  createSessionOnStart: boolean
+  getAccessToken: () => string | undefined
+  onAuth401: (failedToken: string) => Promise<boolean>
+  log: (s: string) => void
+}
+
+/**
+ * Non-interactive bridge entrypoint for the `remoteControl` daemon worker.
+ *
+ * Linear subset of bridgeMain(): no readline dialogs, no stdin key handlers,
+ * no TUI, no process.exit(). Config comes from the caller (daemon.json), auth
+ * comes via IPC (supervisor's AuthManager), logs go to the worker's stdout
+ * pipe. Throws on fatal errors — the worker catches and maps permanent vs
+ * transient to the right exit code.
+ *
+ * Resolves cleanly when `signal` aborts and the poll loop tears down.
+ */
+export async function runBridgeHeadless(
+  opts: HeadlessBridgeOpts,
+  signal: AbortSignal,
+): Promise<void> {
+  const { dir, log } = opts
+
+  // Worker inherits the supervisor's CWD. chdir first so git utilities
+  // (getBranch/getRemoteUrl) — which read from bootstrap CWD state set
+  // below — resolve against the right repo.
+  process.chdir(dir)
+  const { setOriginalCwd, setCwdState } = await import('../bootstrap/state.js')
+  setOriginalCwd(dir)
+  setCwdState(dir)
+
+  const { enableConfigs, checkHasTrustDialogAccepted } = await import(
+    '../utils/config.js'
+  )
+  enableConfigs()
+  const { initSinks } = await import('../utils/sinks.js')
+  initSinks()
+
+  if (!checkHasTrustDialogAccepted()) {
+    throw new BridgeHeadlessPermanentError(
+      `Workspace not trusted: ${dir}. Run \`claude\` in that directory first to accept the trust dialog.`,
+    )
+  }
+
+  if (!opts.getAccessToken()) {
+    // Transient — supervisor's AuthManager may pick up a token on next cycle.
+    throw new Error(BRIDGE_LOGIN_ERROR)
+  }
+
+  const { getBridgeBaseUrl } = await import('./bridgeConfig.js')
+  const baseUrl = getBridgeBaseUrl()
+  if (
+    baseUrl.startsWith('http://') &&
+    !baseUrl.includes('localhost') &&
+    !baseUrl.includes('127.0.0.1')
+  ) {
+    throw new BridgeHeadlessPermanentError(
+      'Remote Control base URL uses HTTP. Only HTTPS or localhost HTTP is allowed.',
+    )
+  }
+  const sessionIngressUrl =
+    process.env.USER_TYPE === 'ant' &&
+    process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      : baseUrl
+
+  const { getBranch, getRemoteUrl, findGitRoot } = await import(
+    '../utils/git.js'
+  )
+  const { hasWorktreeCreateHook } = await import('../utils/hooks.js')
+
+  if (opts.spawnMode === 'worktree') {
+    const worktreeAvailable =
+      hasWorktreeCreateHook() || findGitRoot(dir) !== null
+    if (!worktreeAvailable) {
+      throw new BridgeHeadlessPermanentError(
+        `Worktree mode requires a git repository or WorktreeCreate hooks. Directory ${dir} has neither.`,
+      )
+    }
+  }
+
+  const branch = await getBranch()
+  const gitRepoUrl = await getRemoteUrl()
+  const machineName = hostname()
+  const bridgeId = randomUUID()
+
+  const config: BridgeConfig = {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    maxSessions: opts.capacity,
+    spawnMode: opts.spawnMode,
+    verbose: false,
+    sandbox: opts.sandbox,
+    bridgeId,
+    workerType: 'claude_code',
+    environmentId: randomUUID(),
+    apiBaseUrl: baseUrl,
+    sessionIngressUrl,
+    sessionTimeoutMs: opts.sessionTimeoutMs,
+  }
+
+  const api = createBridgeApiClient({
+    baseUrl,
+    getAccessToken: opts.getAccessToken,
+    runnerVersion: MACRO.VERSION,
+    onDebug: log,
+    onAuth401: opts.onAuth401,
+    getTrustedDeviceToken,
+  })
+
+  let environmentId: string
+  let environmentSecret: string
+  try {
+    const reg = await api.registerBridgeEnvironment(config)
+    environmentId = reg.environment_id
+    environmentSecret = reg.environment_secret
+  } catch (err) {
+    // Transient — let supervisor backoff-retry.
+    throw new Error(`Bridge registration failed: ${errorMessage(err)}`)
+  }
+
+  const spawner = createSessionSpawner({
+    execPath: process.execPath,
+    scriptArgs: spawnScriptArgs(),
+    env: process.env,
+    verbose: false,
+    sandbox: opts.sandbox,
+    permissionMode: opts.permissionMode,
+    onDebug: log,
+  })
+
+  const logger = createHeadlessBridgeLogger(log)
+  logger.printBanner(config, environmentId)
+
+  let initialSessionId: string | undefined
+  if (opts.createSessionOnStart) {
+    const { createBridgeSession } = await import('./createSession.js')
+    try {
+      const sid = await createBridgeSession({
+        environmentId,
+        title: opts.name,
+        events: [],
+        gitRepoUrl,
+        branch,
+        signal,
+        baseUrl,
+        getAccessToken: opts.getAccessToken,
+        permissionMode: opts.permissionMode,
+      })
+      if (sid) {
+        initialSessionId = sid
+        log(`created initial session ${sid}`)
+      }
+    } catch (err) {
+      log(`session pre-creation failed (non-fatal): ${errorMessage(err)}`)
+    }
+  }
+
+  await runBridgeLoop(
+    config,
+    environmentId,
+    environmentSecret,
+    api,
+    spawner,
+    logger,
+    signal,
+    undefined,
+    initialSessionId,
+    async () => opts.getAccessToken(),
+  )
+}
+
+/** BridgeLogger adapter that routes everything to a single line-log fn. */
+function createHeadlessBridgeLogger(log: (s: string) => void): BridgeLogger {
+  const noop = (): void => {}
+  return {
+    printBanner: (cfg, envId) =>
+      log(
+        `registered environmentId=${envId} dir=${cfg.dir} spawnMode=${cfg.spawnMode} capacity=${cfg.maxSessions}`,
+      ),
+    logSessionStart: (id, _prompt) => log(`session start ${id}`),
+    logSessionComplete: (id, ms) => log(`session complete ${id} (${ms}ms)`),
+    logSessionFailed: (id, err) => log(`session failed ${id}: ${err}`),
+    logStatus: log,
+    logVerbose: log,
+    logError: s => log(`error: ${s}`),
+    logReconnected: ms => log(`reconnected after ${ms}ms`),
+    addSession: (id, _url) => log(`session attached ${id}`),
+    removeSession: id => log(`session detached ${id}`),
+    updateIdleStatus: noop,
+    updateReconnectingStatus: noop,
+    updateSessionStatus: noop,
+    updateSessionActivity: noop,
+    updateSessionCount: noop,
+    updateFailedStatus: noop,
+    setSpawnModeDisplay: noop,
+    setRepoInfo: noop,
+    setDebugLogPath: noop,
+    setAttached: noop,
+    setSessionTitle: noop,
+    clearStatus: noop,
+    toggleQr: noop,
+    refreshDisplay: noop,
+  }
+}

+ 461 - 0
src/bridge/bridgeMessaging.ts

@@ -0,0 +1,461 @@
+/**
+ * Shared transport-layer helpers for bridge message handling.
+ *
+ * Extracted from replBridge.ts so both the env-based core (initBridgeCore)
+ * and the env-less core (initEnvLessBridgeCore) can use the same ingress
+ * parsing, control-request handling, and echo-dedup machinery.
+ *
+ * Everything here is pure — no closure over bridge-specific state. All
+ * collaborators (transport, sessionId, UUID sets, callbacks) are passed
+ * as params.
+ */
+
+import { randomUUID } from 'crypto'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+} from '../entrypoints/sdk/controlTypes.js'
+import type { SDKResultSuccess } from '../entrypoints/sdk/coreTypes.js'
+import { logEvent } from '../services/analytics/index.js'
+import { EMPTY_USAGE } from '../services/api/emptyUsage.js'
+import type { Message } from '../types/message.js'
+import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
+import { logForDebugging } from '../utils/debug.js'
+import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
+import { errorMessage } from '../utils/errors.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+import { jsonParse } from '../utils/slowOperations.js'
+import type { ReplBridgeTransport } from './replBridgeTransport.js'
+
+// ─── Type guards ─────────────────────────────────────────────────────────────
+
+/** Type predicate for parsed WebSocket messages. SDKMessage is a
+ *  discriminated union on `type` — validating the discriminant is
+ *  sufficient for the predicate; callers narrow further via the union. */
+export function isSDKMessage(value: unknown): value is SDKMessage {
+  return (
+    value !== null &&
+    typeof value === 'object' &&
+    'type' in value &&
+    typeof value.type === 'string'
+  )
+}
+
+/** Type predicate for control_response messages from the server. */
+export function isSDKControlResponse(
+  value: unknown,
+): value is SDKControlResponse {
+  return (
+    value !== null &&
+    typeof value === 'object' &&
+    'type' in value &&
+    value.type === 'control_response' &&
+    'response' in value
+  )
+}
+
+/** Type predicate for control_request messages from the server. */
+export function isSDKControlRequest(
+  value: unknown,
+): value is SDKControlRequest {
+  return (
+    value !== null &&
+    typeof value === 'object' &&
+    'type' in value &&
+    value.type === 'control_request' &&
+    'request_id' in value &&
+    'request' in value
+  )
+}
+
+/**
+ * True for message types that should be forwarded to the bridge transport.
+ * The server only wants user/assistant turns and slash-command system events;
+ * everything else (tool_result, progress, etc.) is internal REPL chatter.
+ */
+export function isEligibleBridgeMessage(m: Message): boolean {
+  // Virtual messages (REPL inner calls) are display-only — bridge/SDK
+  // consumers see the REPL tool_use/result which summarizes the work.
+  if ((m.type === 'user' || m.type === 'assistant') && m.isVirtual) {
+    return false
+  }
+  return (
+    m.type === 'user' ||
+    m.type === 'assistant' ||
+    (m.type === 'system' && m.subtype === 'local_command')
+  )
+}
+
+/**
+ * Extract title-worthy text from a Message for onUserMessage. Returns
+ * undefined for messages that shouldn't title the session: non-user, meta
+ * (nudges), tool results, compact summaries, non-human origins (task
+ * notifications, channel messages), or pure display-tag content
+ * (<ide_opened_file>, <session-start-hook>, etc.).
+ *
+ * Synthetic interrupts ([Request interrupted by user]) are NOT filtered here —
+ * isSyntheticMessage lives in messages.ts (heavy import, pulls command
+ * registry). The initialMessages path in initReplBridge checks it; the
+ * writeMessages path reaching an interrupt as the *first* message is
+ * implausible (an interrupt implies a prior prompt already flowed through).
+ */
+export function extractTitleText(m: Message): string | undefined {
+  if (m.type !== 'user' || m.isMeta || m.toolUseResult || m.isCompactSummary)
+    return undefined
+  if (m.origin && m.origin.kind !== 'human') return undefined
+  const content = m.message.content
+  let raw: string | undefined
+  if (typeof content === 'string') {
+    raw = content
+  } else {
+    for (const block of content) {
+      if (block.type === 'text') {
+        raw = block.text
+        break
+      }
+    }
+  }
+  if (!raw) return undefined
+  const clean = stripDisplayTagsAllowEmpty(raw)
+  return clean || undefined
+}
+
+// ─── Ingress routing ─────────────────────────────────────────────────────────
+
+/**
+ * Parse an ingress WebSocket message and route it to the appropriate handler.
+ * Ignores messages whose UUID is in recentPostedUUIDs (echoes of what we sent)
+ * or in recentInboundUUIDs (re-deliveries we've already forwarded — e.g.
+ * server replayed history after a transport swap lost the seq-num cursor).
+ */
+export function handleIngressMessage(
+  data: string,
+  recentPostedUUIDs: BoundedUUIDSet,
+  recentInboundUUIDs: BoundedUUIDSet,
+  onInboundMessage: ((msg: SDKMessage) => void | Promise<void>) | undefined,
+  onPermissionResponse?: ((response: SDKControlResponse) => void) | undefined,
+  onControlRequest?: ((request: SDKControlRequest) => void) | undefined,
+): void {
+  try {
+    const parsed: unknown = normalizeControlMessageKeys(jsonParse(data))
+
+    // control_response is not an SDKMessage — check before the type guard
+    if (isSDKControlResponse(parsed)) {
+      logForDebugging('[bridge:repl] Ingress message type=control_response')
+      onPermissionResponse?.(parsed)
+      return
+    }
+
+    // control_request from the server (initialize, set_model, can_use_tool).
+    // Must respond promptly or the server kills the WS (~10-14s timeout).
+    if (isSDKControlRequest(parsed)) {
+      logForDebugging(
+        `[bridge:repl] Inbound control_request subtype=${parsed.request.subtype}`,
+      )
+      onControlRequest?.(parsed)
+      return
+    }
+
+    if (!isSDKMessage(parsed)) return
+
+    // Check for UUID to detect echoes of our own messages
+    const uuid =
+      'uuid' in parsed && typeof parsed.uuid === 'string'
+        ? parsed.uuid
+        : undefined
+
+    if (uuid && recentPostedUUIDs.has(uuid)) {
+      logForDebugging(
+        `[bridge:repl] Ignoring echo: type=${parsed.type} uuid=${uuid}`,
+      )
+      return
+    }
+
+    // Defensive dedup: drop inbound prompts we've already forwarded. The
+    // SSE seq-num carryover (lastTransportSequenceNum) is the primary fix
+    // for history-replay; this catches edge cases where that negotiation
+    // fails (server ignores from_sequence_num, transport died before
+    // receiving any frames, etc).
+    if (uuid && recentInboundUUIDs.has(uuid)) {
+      logForDebugging(
+        `[bridge:repl] Ignoring re-delivered inbound: type=${parsed.type} uuid=${uuid}`,
+      )
+      return
+    }
+
+    logForDebugging(
+      `[bridge:repl] Ingress message type=${parsed.type}${uuid ? ` uuid=${uuid}` : ''}`,
+    )
+
+    if (parsed.type === 'user') {
+      if (uuid) recentInboundUUIDs.add(uuid)
+      logEvent('tengu_bridge_message_received', {
+        is_repl: true,
+      })
+      // Fire-and-forget — handler may be async (attachment resolution).
+      void onInboundMessage?.(parsed)
+    } else {
+      logForDebugging(
+        `[bridge:repl] Ignoring non-user inbound message: type=${parsed.type}`,
+      )
+    }
+  } catch (err) {
+    logForDebugging(
+      `[bridge:repl] Failed to parse ingress message: ${errorMessage(err)}`,
+    )
+  }
+}
+
+// ─── Server-initiated control requests ───────────────────────────────────────
+
+export type ServerControlRequestHandlers = {
+  transport: ReplBridgeTransport | null
+  sessionId: string
+  /**
+   * When true, all mutable requests (interrupt, set_model, set_permission_mode,
+   * set_max_thinking_tokens) reply with an error instead of false-success.
+   * initialize still replies success — the server kills the connection otherwise.
+   * Used by the outbound-only bridge mode and the SDK's /bridge subpath so claude.ai sees a
+   * proper error instead of "action succeeded but nothing happened locally".
+   */
+  outboundOnly?: boolean
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+}
+
+const OUTBOUND_ONLY_ERROR =
+  'This session is outbound-only. Enable Remote Control locally to allow inbound control.'
+
+/**
+ * Respond to inbound control_request messages from the server. The server
+ * sends these for session lifecycle events (initialize, set_model) and
+ * for turn-level coordination (interrupt, set_max_thinking_tokens). If we
+ * don't respond, the server hangs and kills the WS after ~10-14s.
+ *
+ * Previously a closure inside initBridgeCore's onWorkReceived; now takes
+ * collaborators as params so both cores can use it.
+ */
+export function handleServerControlRequest(
+  request: SDKControlRequest,
+  handlers: ServerControlRequestHandlers,
+): void {
+  const {
+    transport,
+    sessionId,
+    outboundOnly,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+  } = handlers
+  if (!transport) {
+    logForDebugging(
+      '[bridge:repl] Cannot respond to control_request: transport not configured',
+    )
+    return
+  }
+
+  let response: SDKControlResponse
+
+  // Outbound-only: reply error for mutable requests so claude.ai doesn't show
+  // false success. initialize must still succeed (server kills the connection
+  // if it doesn't — see comment above).
+  if (outboundOnly && request.request.subtype !== 'initialize') {
+    response = {
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        request_id: request.request_id,
+        error: OUTBOUND_ONLY_ERROR,
+      },
+    }
+    const event = { ...response, session_id: sessionId }
+    void transport.write(event)
+    logForDebugging(
+      `[bridge:repl] Rejected ${request.request.subtype} (outbound-only) request_id=${request.request_id}`,
+    )
+    return
+  }
+
+  switch (request.request.subtype) {
+    case 'initialize':
+      // Respond with minimal capabilities — the REPL handles
+      // commands, models, and account info itself.
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+          response: {
+            commands: [],
+            output_style: 'normal',
+            available_output_styles: ['normal'],
+            models: [],
+            account: {},
+            pid: process.pid,
+          },
+        },
+      }
+      break
+
+    case 'set_model':
+      onSetModel?.(request.request.model)
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+        },
+      }
+      break
+
+    case 'set_max_thinking_tokens':
+      onSetMaxThinkingTokens?.(request.request.max_thinking_tokens)
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+        },
+      }
+      break
+
+    case 'set_permission_mode': {
+      // The callback returns a policy verdict so we can send an error
+      // control_response without importing isAutoModeGateEnabled /
+      // isBypassPermissionsModeDisabled here (bootstrap-isolation). If no
+      // callback is registered (daemon context, which doesn't wire this —
+      // see daemonBridge.ts), return an error verdict rather than a silent
+      // false-success: the mode is never actually applied in that context,
+      // so success would lie to the client.
+      const verdict = onSetPermissionMode?.(request.request.mode) ?? {
+        ok: false,
+        error:
+          'set_permission_mode is not supported in this context (onSetPermissionMode callback not registered)',
+      }
+      if (verdict.ok) {
+        response = {
+          type: 'control_response',
+          response: {
+            subtype: 'success',
+            request_id: request.request_id,
+          },
+        }
+      } else {
+        response = {
+          type: 'control_response',
+          response: {
+            subtype: 'error',
+            request_id: request.request_id,
+            error: verdict.error,
+          },
+        }
+      }
+      break
+    }
+
+    case 'interrupt':
+      onInterrupt?.()
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'success',
+          request_id: request.request_id,
+        },
+      }
+      break
+
+    default:
+      // Unknown subtype — respond with error so the server doesn't
+      // hang waiting for a reply that never comes.
+      response = {
+        type: 'control_response',
+        response: {
+          subtype: 'error',
+          request_id: request.request_id,
+          error: `REPL bridge does not handle control_request subtype: ${request.request.subtype}`,
+        },
+      }
+  }
+
+  const event = { ...response, session_id: sessionId }
+  void transport.write(event)
+  logForDebugging(
+    `[bridge:repl] Sent control_response for ${request.request.subtype} request_id=${request.request_id} result=${response.response.subtype}`,
+  )
+}
+
+// ─── Result message (for session archival on teardown) ───────────────────────
+
+/**
+ * Build a minimal `SDKResultSuccess` message for session archival.
+ * The server needs this event before a WS close to trigger archival.
+ */
+export function makeResultMessage(sessionId: string): SDKResultSuccess {
+  return {
+    type: 'result',
+    subtype: 'success',
+    duration_ms: 0,
+    duration_api_ms: 0,
+    is_error: false,
+    num_turns: 0,
+    result: '',
+    stop_reason: null,
+    total_cost_usd: 0,
+    usage: { ...EMPTY_USAGE },
+    modelUsage: {},
+    permission_denials: [],
+    session_id: sessionId,
+    uuid: randomUUID(),
+  }
+}
+
+// ─── BoundedUUIDSet (echo-dedup ring buffer) ─────────────────────────────────
+
+/**
+ * FIFO-bounded set backed by a circular buffer. Evicts the oldest entry
+ * when capacity is reached, keeping memory usage constant at O(capacity).
+ *
+ * Messages are added in chronological order, so evicted entries are always
+ * the oldest. The caller relies on external ordering (the hook's
+ * lastWrittenIndexRef) as the primary dedup — this set is a secondary
+ * safety net for echo filtering and race-condition dedup.
+ */
+export class BoundedUUIDSet {
+  private readonly capacity: number
+  private readonly ring: (string | undefined)[]
+  private readonly set = new Set<string>()
+  private writeIdx = 0
+
+  constructor(capacity: number) {
+    this.capacity = capacity
+    this.ring = new Array<string | undefined>(capacity)
+  }
+
+  add(uuid: string): void {
+    if (this.set.has(uuid)) return
+    // Evict the entry at the current write position (if occupied)
+    const evicted = this.ring[this.writeIdx]
+    if (evicted !== undefined) {
+      this.set.delete(evicted)
+    }
+    this.ring[this.writeIdx] = uuid
+    this.set.add(uuid)
+    this.writeIdx = (this.writeIdx + 1) % this.capacity
+  }
+
+  has(uuid: string): boolean {
+    return this.set.has(uuid)
+  }
+
+  clear(): void {
+    this.set.clear()
+    this.ring.fill(undefined)
+    this.writeIdx = 0
+  }
+}

+ 43 - 0
src/bridge/bridgePermissionCallbacks.ts

@@ -0,0 +1,43 @@
+import type { PermissionUpdate } from '../utils/permissions/PermissionUpdateSchema.js'
+
+type BridgePermissionResponse = {
+  behavior: 'allow' | 'deny'
+  updatedInput?: Record<string, unknown>
+  updatedPermissions?: PermissionUpdate[]
+  message?: string
+}
+
+type BridgePermissionCallbacks = {
+  sendRequest(
+    requestId: string,
+    toolName: string,
+    input: Record<string, unknown>,
+    toolUseId: string,
+    description: string,
+    permissionSuggestions?: PermissionUpdate[],
+    blockedPath?: string,
+  ): void
+  sendResponse(requestId: string, response: BridgePermissionResponse): void
+  /** Cancel a pending control_request so the web app can dismiss its prompt. */
+  cancelRequest(requestId: string): void
+  onResponse(
+    requestId: string,
+    handler: (response: BridgePermissionResponse) => void,
+  ): () => void // returns unsubscribe
+}
+
+/** Type predicate for validating a parsed control_response payload
+ *  as a BridgePermissionResponse. Checks the required `behavior`
+ *  discriminant rather than using an unsafe `as` cast. */
+function isBridgePermissionResponse(
+  value: unknown,
+): value is BridgePermissionResponse {
+  if (!value || typeof value !== 'object') return false
+  return (
+    'behavior' in value &&
+    (value.behavior === 'allow' || value.behavior === 'deny')
+  )
+}
+
+export { isBridgePermissionResponse }
+export type { BridgePermissionCallbacks, BridgePermissionResponse }

+ 210 - 0
src/bridge/bridgePointer.ts

@@ -0,0 +1,210 @@
+import { mkdir, readFile, stat, unlink, writeFile } from 'fs/promises'
+import { dirname, join } from 'path'
+import { z } from 'zod/v4'
+import { logForDebugging } from '../utils/debug.js'
+import { isENOENT } from '../utils/errors.js'
+import { getWorktreePathsPortable } from '../utils/getWorktreePathsPortable.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import {
+  getProjectsDir,
+  sanitizePath,
+} from '../utils/sessionStoragePortable.js'
+import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
+
+/**
+ * Upper bound on worktree fanout. git worktree list is naturally bounded
+ * (50 is a LOT), but this caps the parallel stat() burst and guards against
+ * pathological setups. Above this, --continue falls back to current-dir-only.
+ */
+const MAX_WORKTREE_FANOUT = 50
+
+/**
+ * Crash-recovery pointer for Remote Control sessions.
+ *
+ * Written immediately after a bridge session is created, periodically
+ * refreshed during the session, and cleared on clean shutdown. If the
+ * process dies unclean (crash, kill -9, terminal closed), the pointer
+ * persists. On next startup, `claude remote-control` detects it and offers
+ * to resume via the --session-id flow from #20460.
+ *
+ * Staleness is checked against the file's mtime (not an embedded timestamp)
+ * so that a periodic re-write with the same content serves as a refresh —
+ * matches the backend's rolling BRIDGE_LAST_POLL_TTL (4h) semantics. A
+ * bridge that's been polling for 5+ hours and then crashes still has a
+ * fresh pointer as long as the refresh ran within the window.
+ *
+ * Scoped per working directory (alongside transcript JSONL files) so two
+ * concurrent bridges in different repos don't clobber each other.
+ */
+
+export const BRIDGE_POINTER_TTL_MS = 4 * 60 * 60 * 1000
+
+const BridgePointerSchema = lazySchema(() =>
+  z.object({
+    sessionId: z.string(),
+    environmentId: z.string(),
+    source: z.enum(['standalone', 'repl']),
+  }),
+)
+
+export type BridgePointer = z.infer<ReturnType<typeof BridgePointerSchema>>
+
+export function getBridgePointerPath(dir: string): string {
+  return join(getProjectsDir(), sanitizePath(dir), 'bridge-pointer.json')
+}
+
+/**
+ * Write the pointer. Also used to refresh mtime during long sessions —
+ * calling with the same IDs is a cheap no-content-change write that bumps
+ * the staleness clock. Best-effort — a crash-recovery file must never
+ * itself cause a crash. Logs and swallows on error.
+ */
+export async function writeBridgePointer(
+  dir: string,
+  pointer: BridgePointer,
+): Promise<void> {
+  const path = getBridgePointerPath(dir)
+  try {
+    await mkdir(dirname(path), { recursive: true })
+    await writeFile(path, jsonStringify(pointer), 'utf8')
+    logForDebugging(`[bridge:pointer] wrote ${path}`)
+  } catch (err: unknown) {
+    logForDebugging(`[bridge:pointer] write failed: ${err}`, { level: 'warn' })
+  }
+}
+
+/**
+ * Read the pointer and its age (ms since last write). Operates directly
+ * and handles errors — no existence check (CLAUDE.md TOCTOU rule). Returns
+ * null on any failure: missing file, corrupted JSON, schema mismatch, or
+ * stale (mtime > 4h ago). Stale/invalid pointers are deleted so they don't
+ * keep re-prompting after the backend has already GC'd the env.
+ */
+export async function readBridgePointer(
+  dir: string,
+): Promise<(BridgePointer & { ageMs: number }) | null> {
+  const path = getBridgePointerPath(dir)
+  let raw: string
+  let mtimeMs: number
+  try {
+    // stat for mtime (staleness anchor), then read. Two syscalls, but both
+    // are needed — mtime IS the data we return, not a TOCTOU guard.
+    mtimeMs = (await stat(path)).mtimeMs
+    raw = await readFile(path, 'utf8')
+  } catch {
+    return null
+  }
+
+  const parsed = BridgePointerSchema().safeParse(safeJsonParse(raw))
+  if (!parsed.success) {
+    logForDebugging(`[bridge:pointer] invalid schema, clearing: ${path}`)
+    await clearBridgePointer(dir)
+    return null
+  }
+
+  const ageMs = Math.max(0, Date.now() - mtimeMs)
+  if (ageMs > BRIDGE_POINTER_TTL_MS) {
+    logForDebugging(`[bridge:pointer] stale (>4h mtime), clearing: ${path}`)
+    await clearBridgePointer(dir)
+    return null
+  }
+
+  return { ...parsed.data, ageMs }
+}
+
+/**
+ * Worktree-aware read for `--continue`. The REPL bridge writes its pointer
+ * to `getOriginalCwd()` which EnterWorktreeTool/activeWorktreeSession can
+ * mutate to a worktree path — but `claude remote-control --continue` runs
+ * with `resolve('.')` = shell CWD. This fans out across git worktree
+ * siblings to find the freshest pointer, matching /resume's semantics.
+ *
+ * Fast path: checks `dir` first. Only shells out to `git worktree list` if
+ * that misses — the common case (pointer in launch dir) is one stat, zero
+ * exec. Fanout reads run in parallel; capped at MAX_WORKTREE_FANOUT.
+ *
+ * Returns the pointer AND the dir it was found in, so the caller can clear
+ * the right file on resume failure.
+ */
+export async function readBridgePointerAcrossWorktrees(
+  dir: string,
+): Promise<{ pointer: BridgePointer & { ageMs: number }; dir: string } | null> {
+  // Fast path: current dir. Covers standalone bridge (always matches) and
+  // REPL bridge when no worktree mutation happened.
+  const here = await readBridgePointer(dir)
+  if (here) {
+    return { pointer: here, dir }
+  }
+
+  // Fanout: scan worktree siblings. getWorktreePathsPortable has a 5s
+  // timeout and returns [] on any error (not a git repo, git not installed).
+  const worktrees = await getWorktreePathsPortable(dir)
+  if (worktrees.length <= 1) return null
+  if (worktrees.length > MAX_WORKTREE_FANOUT) {
+    logForDebugging(
+      `[bridge:pointer] ${worktrees.length} worktrees exceeds fanout cap ${MAX_WORKTREE_FANOUT}, skipping`,
+    )
+    return null
+  }
+
+  // Dedupe against `dir` so we don't re-stat it. sanitizePath normalizes
+  // case/separators so worktree-list output matches our fast-path key even
+  // on Windows where git may emit C:/ vs stored c:/.
+  const dirKey = sanitizePath(dir)
+  const candidates = worktrees.filter(wt => sanitizePath(wt) !== dirKey)
+
+  // Parallel stat+read. Each readBridgePointer is a stat() that ENOENTs
+  // for worktrees with no pointer (cheap) plus a ~100-byte read for the
+  // rare ones that have one. Promise.all → latency ≈ slowest single stat.
+  const results = await Promise.all(
+    candidates.map(async wt => {
+      const p = await readBridgePointer(wt)
+      return p ? { pointer: p, dir: wt } : null
+    }),
+  )
+
+  // Pick freshest (lowest ageMs). The pointer stores environmentId so
+  // resume reconnects to the right env regardless of which worktree
+  // --continue was invoked from.
+  let freshest: {
+    pointer: BridgePointer & { ageMs: number }
+    dir: string
+  } | null = null
+  for (const r of results) {
+    if (r && (!freshest || r.pointer.ageMs < freshest.pointer.ageMs)) {
+      freshest = r
+    }
+  }
+  if (freshest) {
+    logForDebugging(
+      `[bridge:pointer] fanout found pointer in worktree ${freshest.dir} (ageMs=${freshest.pointer.ageMs})`,
+    )
+  }
+  return freshest
+}
+
+/**
+ * Delete the pointer. Idempotent — ENOENT is expected when the process
+ * shut down clean previously.
+ */
+export async function clearBridgePointer(dir: string): Promise<void> {
+  const path = getBridgePointerPath(dir)
+  try {
+    await unlink(path)
+    logForDebugging(`[bridge:pointer] cleared ${path}`)
+  } catch (err: unknown) {
+    if (!isENOENT(err)) {
+      logForDebugging(`[bridge:pointer] clear failed: ${err}`, {
+        level: 'warn',
+      })
+    }
+  }
+}
+
+function safeJsonParse(raw: string): unknown {
+  try {
+    return jsonParse(raw)
+  } catch {
+    return null
+  }
+}

+ 163 - 0
src/bridge/bridgeStatusUtil.ts

@@ -0,0 +1,163 @@
+import {
+  getClaudeAiBaseUrl,
+  getRemoteSessionUrl,
+} from '../constants/product.js'
+import { stringWidth } from '../ink/stringWidth.js'
+import { formatDuration, truncateToWidth } from '../utils/format.js'
+import { getGraphemeSegmenter } from '../utils/intl.js'
+
+/** Bridge status state machine states. */
+export type StatusState =
+  | 'idle'
+  | 'attached'
+  | 'titled'
+  | 'reconnecting'
+  | 'failed'
+
+/** How long a tool activity line stays visible after last tool_start (ms). */
+export const TOOL_DISPLAY_EXPIRY_MS = 30_000
+
+/** Interval for the shimmer animation tick (ms). */
+export const SHIMMER_INTERVAL_MS = 150
+
+export function timestamp(): string {
+  const now = new Date()
+  const h = String(now.getHours()).padStart(2, '0')
+  const m = String(now.getMinutes()).padStart(2, '0')
+  const s = String(now.getSeconds()).padStart(2, '0')
+  return `${h}:${m}:${s}`
+}
+
+export { formatDuration, truncateToWidth as truncatePrompt }
+
+/** Abbreviate a tool activity summary for the trail display. */
+export function abbreviateActivity(summary: string): string {
+  return truncateToWidth(summary, 30)
+}
+
+/** Build the connect URL shown when the bridge is idle. */
+export function buildBridgeConnectUrl(
+  environmentId: string,
+  ingressUrl?: string,
+): string {
+  const baseUrl = getClaudeAiBaseUrl(undefined, ingressUrl)
+  return `${baseUrl}/code?bridge=${environmentId}`
+}
+
+/**
+ * Build the session URL shown when a session is attached. Delegates to
+ * getRemoteSessionUrl for the cse_→session_ prefix translation, then appends
+ * the v1-specific ?bridge={environmentId} query.
+ */
+export function buildBridgeSessionUrl(
+  sessionId: string,
+  environmentId: string,
+  ingressUrl?: string,
+): string {
+  return `${getRemoteSessionUrl(sessionId, ingressUrl)}?bridge=${environmentId}`
+}
+
+/** Compute the glimmer index for a reverse-sweep shimmer animation. */
+export function computeGlimmerIndex(
+  tick: number,
+  messageWidth: number,
+): number {
+  const cycleLength = messageWidth + 20
+  return messageWidth + 10 - (tick % cycleLength)
+}
+
+/**
+ * Split text into three segments by visual column position for shimmer rendering.
+ *
+ * Uses grapheme segmentation and `stringWidth` so the split is correct for
+ * multi-byte characters, emoji, and CJK glyphs.
+ *
+ * Returns `{ before, shimmer, after }` strings. Both renderers (chalk in
+ * bridgeUI.ts and React/Ink in bridge.tsx) apply their own coloring to
+ * these segments.
+ */
+export function computeShimmerSegments(
+  text: string,
+  glimmerIndex: number,
+): { before: string; shimmer: string; after: string } {
+  const messageWidth = stringWidth(text)
+  const shimmerStart = glimmerIndex - 1
+  const shimmerEnd = glimmerIndex + 1
+
+  // When shimmer is offscreen, return all text as "before"
+  if (shimmerStart >= messageWidth || shimmerEnd < 0) {
+    return { before: text, shimmer: '', after: '' }
+  }
+
+  // Split into at most 3 segments by visual column position
+  const clampedStart = Math.max(0, shimmerStart)
+  let colPos = 0
+  let before = ''
+  let shimmer = ''
+  let after = ''
+  for (const { segment } of getGraphemeSegmenter().segment(text)) {
+    const segWidth = stringWidth(segment)
+    if (colPos + segWidth <= clampedStart) {
+      before += segment
+    } else if (colPos > shimmerEnd) {
+      after += segment
+    } else {
+      shimmer += segment
+    }
+    colPos += segWidth
+  }
+
+  return { before, shimmer, after }
+}
+
+/** Computed bridge status label and color from connection state. */
+export type BridgeStatusInfo = {
+  label:
+    | 'Remote Control failed'
+    | 'Remote Control reconnecting'
+    | 'Remote Control active'
+    | 'Remote Control connecting\u2026'
+  color: 'error' | 'warning' | 'success'
+}
+
+/** Derive a status label and color from the bridge connection state. */
+export function getBridgeStatus({
+  error,
+  connected,
+  sessionActive,
+  reconnecting,
+}: {
+  error: string | undefined
+  connected: boolean
+  sessionActive: boolean
+  reconnecting: boolean
+}): BridgeStatusInfo {
+  if (error) return { label: 'Remote Control failed', color: 'error' }
+  if (reconnecting)
+    return { label: 'Remote Control reconnecting', color: 'warning' }
+  if (sessionActive || connected)
+    return { label: 'Remote Control active', color: 'success' }
+  return { label: 'Remote Control connecting\u2026', color: 'warning' }
+}
+
+/** Footer text shown when bridge is idle (Ready state). */
+export function buildIdleFooterText(url: string): string {
+  return `Code everywhere with the Claude app or ${url}`
+}
+
+/** Footer text shown when a session is active (Connected state). */
+export function buildActiveFooterText(url: string): string {
+  return `Continue coding in the Claude app or ${url}`
+}
+
+/** Footer text shown when the bridge has failed. */
+export const FAILED_FOOTER_TEXT = 'Something went wrong, please try again'
+
+/**
+ * Wrap text in an OSC 8 terminal hyperlink. Zero visual width for layout purposes.
+ * strip-ansi (used by stringWidth) correctly strips these sequences, so
+ * countVisualLines in bridgeUI.ts remains accurate.
+ */
+export function wrapWithOsc8Link(text: string, url: string): string {
+  return `\x1b]8;;${url}\x07${text}\x1b]8;;\x07`
+}

+ 530 - 0
src/bridge/bridgeUI.ts

@@ -0,0 +1,530 @@
+import chalk from 'chalk'
+import { toString as qrToString } from 'qrcode'
+import {
+  BRIDGE_FAILED_INDICATOR,
+  BRIDGE_READY_INDICATOR,
+  BRIDGE_SPINNER_FRAMES,
+} from '../constants/figures.js'
+import { stringWidth } from '../ink/stringWidth.js'
+import { logForDebugging } from '../utils/debug.js'
+import {
+  buildActiveFooterText,
+  buildBridgeConnectUrl,
+  buildBridgeSessionUrl,
+  buildIdleFooterText,
+  FAILED_FOOTER_TEXT,
+  formatDuration,
+  type StatusState,
+  TOOL_DISPLAY_EXPIRY_MS,
+  timestamp,
+  truncatePrompt,
+  wrapWithOsc8Link,
+} from './bridgeStatusUtil.js'
+import type {
+  BridgeConfig,
+  BridgeLogger,
+  SessionActivity,
+  SpawnMode,
+} from './types.js'
+
+const QR_OPTIONS = {
+  type: 'utf8' as const,
+  errorCorrectionLevel: 'L' as const,
+  small: true,
+}
+
+/** Generate a QR code and return its lines. */
+async function generateQr(url: string): Promise<string[]> {
+  const qr = await qrToString(url, QR_OPTIONS)
+  return qr.split('\n').filter((line: string) => line.length > 0)
+}
+
+export function createBridgeLogger(options: {
+  verbose: boolean
+  write?: (s: string) => void
+}): BridgeLogger {
+  const write = options.write ?? ((s: string) => process.stdout.write(s))
+  const verbose = options.verbose
+
+  // Track how many status lines are currently displayed at the bottom
+  let statusLineCount = 0
+
+  // Status state machine
+  let currentState: StatusState = 'idle'
+  let currentStateText = 'Ready'
+  let repoName = ''
+  let branch = ''
+  let debugLogPath = ''
+
+  // Connect URL (built in printBanner with correct base for staging/prod)
+  let connectUrl = ''
+  let cachedIngressUrl = ''
+  let cachedEnvironmentId = ''
+  let activeSessionUrl: string | null = null
+
+  // QR code lines for the current URL
+  let qrLines: string[] = []
+  let qrVisible = false
+
+  // Tool activity for the second status line
+  let lastToolSummary: string | null = null
+  let lastToolTime = 0
+
+  // Session count indicator (shown when multi-session mode is enabled)
+  let sessionActive = 0
+  let sessionMax = 1
+  // Spawn mode shown in the session-count line + gates the `w` hint
+  let spawnModeDisplay: 'same-dir' | 'worktree' | null = null
+  let spawnMode: SpawnMode = 'single-session'
+
+  // Per-session display info for the multi-session bullet list (keyed by compat sessionId)
+  const sessionDisplayInfo = new Map<
+    string,
+    { title?: string; url: string; activity?: SessionActivity }
+  >()
+
+  // Connecting spinner state
+  let connectingTimer: ReturnType<typeof setInterval> | null = null
+  let connectingTick = 0
+
+  /**
+   * Count how many visual terminal rows a string occupies, accounting for
+   * line wrapping. Each `\n` is one row, and content wider than the terminal
+   * wraps to additional rows.
+   */
+  function countVisualLines(text: string): number {
+    // eslint-disable-next-line custom-rules/prefer-use-terminal-size
+    const cols = process.stdout.columns || 80 // non-React CLI context
+    let count = 0
+    // Split on newlines to get logical lines
+    for (const logical of text.split('\n')) {
+      if (logical.length === 0) {
+        // Empty segment between consecutive \n — counts as 1 row
+        count++
+        continue
+      }
+      const width = stringWidth(logical)
+      count += Math.max(1, Math.ceil(width / cols))
+    }
+    // The trailing \n in "line\n" produces an empty last element — don't count it
+    // because the cursor sits at the start of the next line, not a new visual row.
+    if (text.endsWith('\n')) {
+      count--
+    }
+    return count
+  }
+
+  /** Write a status line and track its visual line count. */
+  function writeStatus(text: string): void {
+    write(text)
+    statusLineCount += countVisualLines(text)
+  }
+
+  /** Clear any currently displayed status lines. */
+  function clearStatusLines(): void {
+    if (statusLineCount <= 0) return
+    logForDebugging(`[bridge:ui] clearStatusLines count=${statusLineCount}`)
+    // Move cursor up to the start of the status block, then erase everything below
+    write(`\x1b[${statusLineCount}A`) // cursor up N lines
+    write('\x1b[J') // erase from cursor to end of screen
+    statusLineCount = 0
+  }
+
+  /** Print a permanent log line, clearing status first and restoring after. */
+  function printLog(line: string): void {
+    clearStatusLines()
+    write(line)
+  }
+
+  /** Regenerate the QR code with the given URL. */
+  function regenerateQr(url: string): void {
+    generateQr(url)
+      .then(lines => {
+        qrLines = lines
+        renderStatusLine()
+      })
+      .catch(e => {
+        logForDebugging(`QR code generation failed: ${e}`, { level: 'error' })
+      })
+  }
+
+  /** Render the connecting spinner line (shown before first updateIdleStatus). */
+  function renderConnectingLine(): void {
+    clearStatusLines()
+
+    const frame =
+      BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
+    let suffix = ''
+    if (repoName) {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
+    }
+    if (branch) {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
+    }
+    writeStatus(
+      `${chalk.yellow(frame)} ${chalk.yellow('Connecting')}${suffix}\n`,
+    )
+  }
+
+  /** Start the connecting spinner. Stopped by first updateIdleStatus(). */
+  function startConnecting(): void {
+    stopConnecting()
+    renderConnectingLine()
+    connectingTimer = setInterval(() => {
+      connectingTick++
+      renderConnectingLine()
+    }, 150)
+  }
+
+  /** Stop the connecting spinner. */
+  function stopConnecting(): void {
+    if (connectingTimer) {
+      clearInterval(connectingTimer)
+      connectingTimer = null
+    }
+  }
+
+  /** Render and write the current status lines based on state. */
+  function renderStatusLine(): void {
+    if (currentState === 'reconnecting' || currentState === 'failed') {
+      // These states are handled separately (updateReconnectingStatus /
+      // updateFailedStatus). Return before clearing so callers like toggleQr
+      // and setSpawnModeDisplay don't blank the display during these states.
+      return
+    }
+
+    clearStatusLines()
+
+    const isIdle = currentState === 'idle'
+
+    // QR code above the status line
+    if (qrVisible) {
+      for (const line of qrLines) {
+        writeStatus(`${chalk.dim(line)}\n`)
+      }
+    }
+
+    // Determine indicator and colors based on state
+    const indicator = BRIDGE_READY_INDICATOR
+    const indicatorColor = isIdle ? chalk.green : chalk.cyan
+    const baseColor = isIdle ? chalk.green : chalk.cyan
+    const stateText = baseColor(currentStateText)
+
+    // Build the suffix with repo and branch
+    let suffix = ''
+    if (repoName) {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
+    }
+    // In worktree mode each session gets its own branch, so showing the
+    // bridge's branch would be misleading.
+    if (branch && spawnMode !== 'worktree') {
+      suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
+    }
+
+    if (process.env.USER_TYPE === 'ant' && debugLogPath) {
+      writeStatus(
+        `${chalk.yellow('[ANT-ONLY] Logs:')} ${chalk.dim(debugLogPath)}\n`,
+      )
+    }
+    writeStatus(`${indicatorColor(indicator)} ${stateText}${suffix}\n`)
+
+    // Session count and per-session list (multi-session mode only)
+    if (sessionMax > 1) {
+      const modeHint =
+        spawnMode === 'worktree'
+          ? 'New sessions will be created in an isolated worktree'
+          : 'New sessions will be created in the current directory'
+      writeStatus(
+        `    ${chalk.dim(`Capacity: ${sessionActive}/${sessionMax} \u00b7 ${modeHint}`)}\n`,
+      )
+      for (const [, info] of sessionDisplayInfo) {
+        const titleText = info.title
+          ? truncatePrompt(info.title, 35)
+          : chalk.dim('Attached')
+        const titleLinked = wrapWithOsc8Link(titleText, info.url)
+        const act = info.activity
+        const showAct = act && act.type !== 'result' && act.type !== 'error'
+        const actText = showAct
+          ? chalk.dim(` ${truncatePrompt(act.summary, 40)}`)
+          : ''
+        writeStatus(`    ${titleLinked}${actText}
+`)
+      }
+    }
+
+    // Mode line for spawn modes with a single slot (or true single-session mode)
+    if (sessionMax === 1) {
+      const modeText =
+        spawnMode === 'single-session'
+          ? 'Single session \u00b7 exits when complete'
+          : spawnMode === 'worktree'
+            ? `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in an isolated worktree`
+            : `Capacity: ${sessionActive}/1 \u00b7 New sessions will be created in the current directory`
+      writeStatus(`    ${chalk.dim(modeText)}\n`)
+    }
+
+    // Tool activity line for single-session mode
+    if (
+      sessionMax === 1 &&
+      !isIdle &&
+      lastToolSummary &&
+      Date.now() - lastToolTime < TOOL_DISPLAY_EXPIRY_MS
+    ) {
+      writeStatus(`  ${chalk.dim(truncatePrompt(lastToolSummary, 60))}\n`)
+    }
+
+    // Blank line separator before footer
+    const url = activeSessionUrl ?? connectUrl
+    if (url) {
+      writeStatus('\n')
+      const footerText = isIdle
+        ? buildIdleFooterText(url)
+        : buildActiveFooterText(url)
+      const qrHint = qrVisible
+        ? chalk.dim.italic('space to hide QR code')
+        : chalk.dim.italic('space to show QR code')
+      const toggleHint = spawnModeDisplay
+        ? chalk.dim.italic(' \u00b7 w to toggle spawn mode')
+        : ''
+      writeStatus(`${chalk.dim(footerText)}\n`)
+      writeStatus(`${qrHint}${toggleHint}\n`)
+    }
+  }
+
+  return {
+    printBanner(config: BridgeConfig, environmentId: string): void {
+      cachedIngressUrl = config.sessionIngressUrl
+      cachedEnvironmentId = environmentId
+      connectUrl = buildBridgeConnectUrl(environmentId, cachedIngressUrl)
+      regenerateQr(connectUrl)
+
+      if (verbose) {
+        write(chalk.dim(`Remote Control`) + ` v${MACRO.VERSION}\n`)
+      }
+      if (verbose) {
+        if (config.spawnMode !== 'single-session') {
+          write(chalk.dim(`Spawn mode: `) + `${config.spawnMode}\n`)
+          write(
+            chalk.dim(`Max concurrent sessions: `) + `${config.maxSessions}\n`,
+          )
+        }
+        write(chalk.dim(`Environment ID: `) + `${environmentId}\n`)
+      }
+      if (config.sandbox) {
+        write(chalk.dim(`Sandbox: `) + `${chalk.green('Enabled')}\n`)
+      }
+      write('\n')
+
+      // Start connecting spinner — first updateIdleStatus() will stop it
+      startConnecting()
+    },
+
+    logSessionStart(sessionId: string, prompt: string): void {
+      if (verbose) {
+        const short = truncatePrompt(prompt, 80)
+        printLog(
+          chalk.dim(`[${timestamp()}]`) +
+            ` Session started: ${chalk.white(`"${short}"`)} (${chalk.dim(sessionId)})\n`,
+        )
+      }
+    },
+
+    logSessionComplete(sessionId: string, durationMs: number): void {
+      printLog(
+        chalk.dim(`[${timestamp()}]`) +
+          ` Session ${chalk.green('completed')} (${formatDuration(durationMs)}) ${chalk.dim(sessionId)}\n`,
+      )
+    },
+
+    logSessionFailed(sessionId: string, error: string): void {
+      printLog(
+        chalk.dim(`[${timestamp()}]`) +
+          ` Session ${chalk.red('failed')}: ${error} ${chalk.dim(sessionId)}\n`,
+      )
+    },
+
+    logStatus(message: string): void {
+      printLog(chalk.dim(`[${timestamp()}]`) + ` ${message}\n`)
+    },
+
+    logVerbose(message: string): void {
+      if (verbose) {
+        printLog(chalk.dim(`[${timestamp()}] ${message}`) + '\n')
+      }
+    },
+
+    logError(message: string): void {
+      printLog(chalk.red(`[${timestamp()}] Error: ${message}`) + '\n')
+    },
+
+    logReconnected(disconnectedMs: number): void {
+      printLog(
+        chalk.dim(`[${timestamp()}]`) +
+          ` ${chalk.green('Reconnected')} after ${formatDuration(disconnectedMs)}\n`,
+      )
+    },
+
+    setRepoInfo(repo: string, branchName: string): void {
+      repoName = repo
+      branch = branchName
+    },
+
+    setDebugLogPath(path: string): void {
+      debugLogPath = path
+    },
+
+    updateIdleStatus(): void {
+      stopConnecting()
+
+      currentState = 'idle'
+      currentStateText = 'Ready'
+      lastToolSummary = null
+      lastToolTime = 0
+      activeSessionUrl = null
+      regenerateQr(connectUrl)
+      renderStatusLine()
+    },
+
+    setAttached(sessionId: string): void {
+      stopConnecting()
+      currentState = 'attached'
+      currentStateText = 'Connected'
+      lastToolSummary = null
+      lastToolTime = 0
+      // Multi-session: keep footer/QR on the environment connect URL so users
+      // can spawn more sessions. Per-session links are in the bullet list.
+      if (sessionMax <= 1) {
+        activeSessionUrl = buildBridgeSessionUrl(
+          sessionId,
+          cachedEnvironmentId,
+          cachedIngressUrl,
+        )
+        regenerateQr(activeSessionUrl)
+      }
+      renderStatusLine()
+    },
+
+    updateReconnectingStatus(delayStr: string, elapsedStr: string): void {
+      stopConnecting()
+      clearStatusLines()
+      currentState = 'reconnecting'
+
+      // QR code above the status line
+      if (qrVisible) {
+        for (const line of qrLines) {
+          writeStatus(`${chalk.dim(line)}\n`)
+        }
+      }
+
+      const frame =
+        BRIDGE_SPINNER_FRAMES[connectingTick % BRIDGE_SPINNER_FRAMES.length]!
+      connectingTick++
+      writeStatus(
+        `${chalk.yellow(frame)} ${chalk.yellow('Reconnecting')} ${chalk.dim('\u00b7')} ${chalk.dim(`retrying in ${delayStr}`)} ${chalk.dim('\u00b7')} ${chalk.dim(`disconnected ${elapsedStr}`)}\n`,
+      )
+    },
+
+    updateFailedStatus(error: string): void {
+      stopConnecting()
+      clearStatusLines()
+      currentState = 'failed'
+
+      let suffix = ''
+      if (repoName) {
+        suffix += chalk.dim(' \u00b7 ') + chalk.dim(repoName)
+      }
+      if (branch) {
+        suffix += chalk.dim(' \u00b7 ') + chalk.dim(branch)
+      }
+
+      writeStatus(
+        `${chalk.red(BRIDGE_FAILED_INDICATOR)} ${chalk.red('Remote Control Failed')}${suffix}\n`,
+      )
+      writeStatus(`${chalk.dim(FAILED_FOOTER_TEXT)}\n`)
+
+      if (error) {
+        writeStatus(`${chalk.red(error)}\n`)
+      }
+    },
+
+    updateSessionStatus(
+      _sessionId: string,
+      _elapsed: string,
+      activity: SessionActivity,
+      _trail: string[],
+    ): void {
+      // Cache tool activity for the second status line
+      if (activity.type === 'tool_start') {
+        lastToolSummary = activity.summary
+        lastToolTime = Date.now()
+      }
+      renderStatusLine()
+    },
+
+    clearStatus(): void {
+      stopConnecting()
+      clearStatusLines()
+    },
+
+    toggleQr(): void {
+      qrVisible = !qrVisible
+      renderStatusLine()
+    },
+
+    updateSessionCount(active: number, max: number, mode: SpawnMode): void {
+      if (sessionActive === active && sessionMax === max && spawnMode === mode)
+        return
+      sessionActive = active
+      sessionMax = max
+      spawnMode = mode
+      // Don't re-render here — the status ticker calls renderStatusLine
+      // on its own cadence, and the next tick will pick up the new values.
+    },
+
+    setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void {
+      if (spawnModeDisplay === mode) return
+      spawnModeDisplay = mode
+      // Also sync the #21118-added spawnMode so the next render shows correct
+      // mode hint + branch visibility. Don't render here — matches
+      // updateSessionCount: called before printBanner (initial setup) and
+      // again from the `w` handler (which follows with refreshDisplay).
+      if (mode) spawnMode = mode
+    },
+
+    addSession(sessionId: string, url: string): void {
+      sessionDisplayInfo.set(sessionId, { url })
+    },
+
+    updateSessionActivity(sessionId: string, activity: SessionActivity): void {
+      const info = sessionDisplayInfo.get(sessionId)
+      if (!info) return
+      info.activity = activity
+    },
+
+    setSessionTitle(sessionId: string, title: string): void {
+      const info = sessionDisplayInfo.get(sessionId)
+      if (!info) return
+      info.title = title
+      // Guard against reconnecting/failed — renderStatusLine clears then returns
+      // early for those states, which would erase the spinner/error.
+      if (currentState === 'reconnecting' || currentState === 'failed') return
+      if (sessionMax === 1) {
+        // Single-session: show title in the main status line too.
+        currentState = 'titled'
+        currentStateText = truncatePrompt(title, 40)
+      }
+      renderStatusLine()
+    },
+
+    removeSession(sessionId: string): void {
+      sessionDisplayInfo.delete(sessionId)
+    },
+
+    refreshDisplay(): void {
+      // Skip during reconnecting/failed — renderStatusLine clears then returns
+      // early for those states, which would erase the spinner/error.
+      if (currentState === 'reconnecting' || currentState === 'failed') return
+      renderStatusLine()
+    },
+  }
+}

+ 56 - 0
src/bridge/capacityWake.ts

@@ -0,0 +1,56 @@
+/**
+ * Shared capacity-wake primitive for bridge poll loops.
+ *
+ * Both replBridge.ts and bridgeMain.ts need to sleep while "at capacity"
+ * but wake early when either (a) the outer loop signal aborts (shutdown),
+ * or (b) capacity frees up (session done / transport lost). This module
+ * encapsulates the mutable wake-controller + two-signal merger that both
+ * poll loops previously duplicated byte-for-byte.
+ */
+
+export type CapacitySignal = { signal: AbortSignal; cleanup: () => void }
+
+export type CapacityWake = {
+  /**
+   * Create a signal that aborts when either the outer loop signal or the
+   * capacity-wake controller fires. Returns the merged signal and a cleanup
+   * function that removes listeners when the sleep resolves normally
+   * (without abort).
+   */
+  signal(): CapacitySignal
+  /**
+   * Abort the current at-capacity sleep and arm a fresh controller so the
+   * poll loop immediately re-checks for new work.
+   */
+  wake(): void
+}
+
+export function createCapacityWake(outerSignal: AbortSignal): CapacityWake {
+  let wakeController = new AbortController()
+
+  function wake(): void {
+    wakeController.abort()
+    wakeController = new AbortController()
+  }
+
+  function signal(): CapacitySignal {
+    const merged = new AbortController()
+    const abort = (): void => merged.abort()
+    if (outerSignal.aborted || wakeController.signal.aborted) {
+      merged.abort()
+      return { signal: merged.signal, cleanup: () => {} }
+    }
+    outerSignal.addEventListener('abort', abort, { once: true })
+    const capSig = wakeController.signal
+    capSig.addEventListener('abort', abort, { once: true })
+    return {
+      signal: merged.signal,
+      cleanup: () => {
+        outerSignal.removeEventListener('abort', abort)
+        capSig.removeEventListener('abort', abort)
+      },
+    }
+  }
+
+  return { signal, wake }
+}

+ 168 - 0
src/bridge/codeSessionApi.ts

@@ -0,0 +1,168 @@
+/**
+ * Thin HTTP wrappers for the CCR v2 code-session API.
+ *
+ * Separate file from remoteBridgeCore.ts so the SDK /bridge subpath can
+ * export createCodeSession + fetchRemoteCredentials without bundling the
+ * heavy CLI tree (analytics, transport, etc.). Callers supply explicit
+ * accessToken + baseUrl — no implicit auth or config reads.
+ */
+
+import axios from 'axios'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+import { extractErrorDetail } from './debugUtils.js'
+
+const ANTHROPIC_VERSION = '2023-06-01'
+
+function oauthHeaders(accessToken: string): Record<string, string> {
+  return {
+    Authorization: `Bearer ${accessToken}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': ANTHROPIC_VERSION,
+  }
+}
+
+export async function createCodeSession(
+  baseUrl: string,
+  accessToken: string,
+  title: string,
+  timeoutMs: number,
+  tags?: string[],
+): Promise<string | null> {
+  const url = `${baseUrl}/v1/code/sessions`
+  let response
+  try {
+    response = await axios.post(
+      url,
+      // bridge: {} is the positive signal for the oneof runner — omitting it
+      // (or sending environment_id: "") now 400s. BridgeRunner is an empty
+      // message today; it's a placeholder for future bridge-specific options.
+      { title, bridge: {}, ...(tags?.length ? { tags } : {}) },
+      {
+        headers: oauthHeaders(accessToken),
+        timeout: timeoutMs,
+        validateStatus: s => s < 500,
+      },
+    )
+  } catch (err: unknown) {
+    logForDebugging(
+      `[code-session] Session create request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+
+  if (response.status !== 200 && response.status !== 201) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[code-session] Session create failed ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  const data: unknown = response.data
+  if (
+    !data ||
+    typeof data !== 'object' ||
+    !('session' in data) ||
+    !data.session ||
+    typeof data.session !== 'object' ||
+    !('id' in data.session) ||
+    typeof data.session.id !== 'string' ||
+    !data.session.id.startsWith('cse_')
+  ) {
+    logForDebugging(
+      `[code-session] No session.id (cse_*) in response: ${jsonStringify(data).slice(0, 200)}`,
+    )
+    return null
+  }
+  return data.session.id
+}
+
+/**
+ * Credentials from POST /bridge. JWT is opaque — do not decode.
+ * Each /bridge call bumps worker_epoch server-side (it IS the register).
+ */
+export type RemoteCredentials = {
+  worker_jwt: string
+  api_base_url: string
+  expires_in: number
+  worker_epoch: number
+}
+
+export async function fetchRemoteCredentials(
+  sessionId: string,
+  baseUrl: string,
+  accessToken: string,
+  timeoutMs: number,
+  trustedDeviceToken?: string,
+): Promise<RemoteCredentials | null> {
+  const url = `${baseUrl}/v1/code/sessions/${sessionId}/bridge`
+  const headers = oauthHeaders(accessToken)
+  if (trustedDeviceToken) {
+    headers['X-Trusted-Device-Token'] = trustedDeviceToken
+  }
+  let response
+  try {
+    response = await axios.post(
+      url,
+      {},
+      {
+        headers,
+        timeout: timeoutMs,
+        validateStatus: s => s < 500,
+      },
+    )
+  } catch (err: unknown) {
+    logForDebugging(
+      `[code-session] /bridge request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+
+  if (response.status !== 200) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[code-session] /bridge failed ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  const data: unknown = response.data
+  if (
+    data === null ||
+    typeof data !== 'object' ||
+    !('worker_jwt' in data) ||
+    typeof data.worker_jwt !== 'string' ||
+    !('expires_in' in data) ||
+    typeof data.expires_in !== 'number' ||
+    !('api_base_url' in data) ||
+    typeof data.api_base_url !== 'string' ||
+    !('worker_epoch' in data)
+  ) {
+    logForDebugging(
+      `[code-session] /bridge response malformed (need worker_jwt, expires_in, api_base_url, worker_epoch): ${jsonStringify(data).slice(0, 200)}`,
+    )
+    return null
+  }
+  // protojson serializes int64 as a string to avoid JS precision loss;
+  // Go may also return a number depending on encoder settings.
+  const rawEpoch = data.worker_epoch
+  const epoch = typeof rawEpoch === 'string' ? Number(rawEpoch) : rawEpoch
+  if (
+    typeof epoch !== 'number' ||
+    !Number.isFinite(epoch) ||
+    !Number.isSafeInteger(epoch)
+  ) {
+    logForDebugging(
+      `[code-session] /bridge worker_epoch invalid: ${jsonStringify(rawEpoch)}`,
+    )
+    return null
+  }
+  return {
+    worker_jwt: data.worker_jwt,
+    api_base_url: data.api_base_url,
+    expires_in: data.expires_in,
+    worker_epoch: epoch,
+  }
+}

+ 384 - 0
src/bridge/createSession.ts

@@ -0,0 +1,384 @@
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { extractErrorDetail } from './debugUtils.js'
+import { toCompatSessionId } from './sessionIdCompat.js'
+
+type GitSource = {
+  type: 'git_repository'
+  url: string
+  revision?: string
+}
+
+type GitOutcome = {
+  type: 'git_repository'
+  git_info: { type: 'github'; repo: string; branches: string[] }
+}
+
+// Events must be wrapped in { type: 'event', data: <sdk_message> } for the
+// POST /v1/sessions endpoint (discriminated union format).
+type SessionEvent = {
+  type: 'event'
+  data: SDKMessage
+}
+
+/**
+ * Create a session on a bridge environment via POST /v1/sessions.
+ *
+ * Used by both `claude remote-control` (empty session so the user has somewhere to
+ * type immediately) and `/remote-control` (session pre-populated with conversation
+ * history).
+ *
+ * Returns the session ID on success, or null if creation fails (non-fatal).
+ */
+export async function createBridgeSession({
+  environmentId,
+  title,
+  events,
+  gitRepoUrl,
+  branch,
+  signal,
+  baseUrl: baseUrlOverride,
+  getAccessToken,
+  permissionMode,
+}: {
+  environmentId: string
+  title?: string
+  events: SessionEvent[]
+  gitRepoUrl: string | null
+  branch: string
+  signal: AbortSignal
+  baseUrl?: string
+  getAccessToken?: () => string | undefined
+  permissionMode?: string
+}): Promise<string | null> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { parseGitHubRepository } = await import('../utils/detectRepository.js')
+  const { getDefaultBranch } = await import('../utils/git.js')
+  const { getMainLoopModel } = await import('../utils/model/model.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session creation')
+    return null
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session creation')
+    return null
+  }
+
+  // Build git source and outcome context
+  let gitSource: GitSource | null = null
+  let gitOutcome: GitOutcome | null = null
+
+  if (gitRepoUrl) {
+    const { parseGitRemote } = await import('../utils/detectRepository.js')
+    const parsed = parseGitRemote(gitRepoUrl)
+    if (parsed) {
+      const { host, owner, name } = parsed
+      const revision = branch || (await getDefaultBranch()) || undefined
+      gitSource = {
+        type: 'git_repository',
+        url: `https://${host}/${owner}/${name}`,
+        revision,
+      }
+      gitOutcome = {
+        type: 'git_repository',
+        git_info: {
+          type: 'github',
+          repo: `${owner}/${name}`,
+          branches: [`claude/${branch || 'task'}`],
+        },
+      }
+    } else {
+      // Fallback: try parseGitHubRepository for owner/repo format
+      const ownerRepo = parseGitHubRepository(gitRepoUrl)
+      if (ownerRepo) {
+        const [owner, name] = ownerRepo.split('/')
+        if (owner && name) {
+          const revision = branch || (await getDefaultBranch()) || undefined
+          gitSource = {
+            type: 'git_repository',
+            url: `https://github.com/${owner}/${name}`,
+            revision,
+          }
+          gitOutcome = {
+            type: 'git_repository',
+            git_info: {
+              type: 'github',
+              repo: `${owner}/${name}`,
+              branches: [`claude/${branch || 'task'}`],
+            },
+          }
+        }
+      }
+    }
+  }
+
+  const requestBody = {
+    ...(title !== undefined && { title }),
+    events,
+    session_context: {
+      sources: gitSource ? [gitSource] : [],
+      outcomes: gitOutcome ? [gitOutcome] : [],
+      model: getMainLoopModel(),
+    },
+    environment_id: environmentId,
+    source: 'remote-control',
+    ...(permissionMode && { permission_mode: permissionMode }),
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${baseUrlOverride ?? getOauthConfig().BASE_API_URL}/v1/sessions`
+  let response
+  try {
+    response = await axios.post(url, requestBody, {
+      headers,
+      signal,
+      validateStatus: s => s < 500,
+    })
+  } catch (err: unknown) {
+    logForDebugging(
+      `[bridge] Session creation request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+  const isSuccess = response.status === 200 || response.status === 201
+
+  if (!isSuccess) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[bridge] Session creation failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  const sessionData: unknown = response.data
+  if (
+    !sessionData ||
+    typeof sessionData !== 'object' ||
+    !('id' in sessionData) ||
+    typeof sessionData.id !== 'string'
+  ) {
+    logForDebugging('[bridge] No session ID in response')
+    return null
+  }
+
+  return sessionData.id
+}
+
+/**
+ * Fetch a bridge session via GET /v1/sessions/{id}.
+ *
+ * Returns the session's environment_id (for `--session-id` resume) and title.
+ * Uses the same org-scoped headers as create/archive — the environments-level
+ * client in bridgeApi.ts uses a different beta header and no org UUID, which
+ * makes the Sessions API return 404.
+ */
+export async function getBridgeSession(
+  sessionId: string,
+  opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
+): Promise<{ environment_id?: string; title?: string } | null> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session fetch')
+    return null
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session fetch')
+    return null
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}`
+  logForDebugging(`[bridge] Fetching session ${sessionId}`)
+
+  let response
+  try {
+    response = await axios.get<{ environment_id?: string; title?: string }>(
+      url,
+      { headers, timeout: 10_000, validateStatus: s => s < 500 },
+    )
+  } catch (err: unknown) {
+    logForDebugging(
+      `[bridge] Session fetch request failed: ${errorMessage(err)}`,
+    )
+    return null
+  }
+
+  if (response.status !== 200) {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[bridge] Session fetch failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+    return null
+  }
+
+  return response.data
+}
+
+/**
+ * Archive a bridge session via POST /v1/sessions/{id}/archive.
+ *
+ * The CCR server never auto-archives sessions — archival is always an
+ * explicit client action. Both `claude remote-control` (standalone bridge) and the
+ * always-on `/remote-control` REPL bridge call this during shutdown to archive any
+ * sessions that are still alive.
+ *
+ * The archive endpoint accepts sessions in any status (running, idle,
+ * requires_action, pending) and returns 409 if already archived, making
+ * it safe to call even if the server-side runner already archived the
+ * session.
+ *
+ * Callers must handle errors — this function has no try/catch; 5xx,
+ * timeouts, and network errors throw. Archival is best-effort during
+ * cleanup; call sites wrap with .catch().
+ */
+export async function archiveBridgeSession(
+  sessionId: string,
+  opts?: {
+    baseUrl?: string
+    getAccessToken?: () => string | undefined
+    timeoutMs?: number
+  },
+): Promise<void> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session archive')
+    return
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session archive')
+    return
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${sessionId}/archive`
+  logForDebugging(`[bridge] Archiving session ${sessionId}`)
+
+  const response = await axios.post(
+    url,
+    {},
+    {
+      headers,
+      timeout: opts?.timeoutMs ?? 10_000,
+      validateStatus: s => s < 500,
+    },
+  )
+
+  if (response.status === 200) {
+    logForDebugging(`[bridge] Session ${sessionId} archived successfully`)
+  } else {
+    const detail = extractErrorDetail(response.data)
+    logForDebugging(
+      `[bridge] Session archive failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+    )
+  }
+}
+
+/**
+ * Update the title of a bridge session via PATCH /v1/sessions/{id}.
+ *
+ * Called when the user renames a session via /rename while a bridge
+ * connection is active, so the title stays in sync on claude.ai/code.
+ *
+ * Errors are swallowed — title sync is best-effort.
+ */
+export async function updateBridgeSessionTitle(
+  sessionId: string,
+  title: string,
+  opts?: { baseUrl?: string; getAccessToken?: () => string | undefined },
+): Promise<void> {
+  const { getClaudeAIOAuthTokens } = await import('../utils/auth.js')
+  const { getOrganizationUUID } = await import('../services/oauth/client.js')
+  const { getOauthConfig } = await import('../constants/oauth.js')
+  const { getOAuthHeaders } = await import('../utils/teleport/api.js')
+  const { default: axios } = await import('axios')
+
+  const accessToken =
+    opts?.getAccessToken?.() ?? getClaudeAIOAuthTokens()?.accessToken
+  if (!accessToken) {
+    logForDebugging('[bridge] No access token for session title update')
+    return
+  }
+
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logForDebugging('[bridge] No org UUID for session title update')
+    return
+  }
+
+  const headers = {
+    ...getOAuthHeaders(accessToken),
+    'anthropic-beta': 'ccr-byoc-2025-07-29',
+    'x-organization-uuid': orgUUID,
+  }
+
+  // Compat gateway only accepts session_* (compat/convert.go:27). v2 callers
+  // pass raw cse_*; retag here so all callers can pass whatever they hold.
+  // Idempotent for v1's session_* and bridgeMain's pre-converted compatSessionId.
+  const compatId = toCompatSessionId(sessionId)
+  const url = `${opts?.baseUrl ?? getOauthConfig().BASE_API_URL}/v1/sessions/${compatId}`
+  logForDebugging(`[bridge] Updating session title: ${compatId} → ${title}`)
+
+  try {
+    const response = await axios.patch(
+      url,
+      { title },
+      { headers, timeout: 10_000, validateStatus: s => s < 500 },
+    )
+
+    if (response.status === 200) {
+      logForDebugging(`[bridge] Session title updated successfully`)
+    } else {
+      const detail = extractErrorDetail(response.data)
+      logForDebugging(
+        `[bridge] Session title update failed with status ${response.status}${detail ? `: ${detail}` : ''}`,
+      )
+    }
+  } catch (err: unknown) {
+    logForDebugging(
+      `[bridge] Session title update request failed: ${errorMessage(err)}`,
+    )
+  }
+}

+ 141 - 0
src/bridge/debugUtils.ts

@@ -0,0 +1,141 @@
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+
+const DEBUG_MSG_LIMIT = 2000
+
+const SECRET_FIELD_NAMES = [
+  'session_ingress_token',
+  'environment_secret',
+  'access_token',
+  'secret',
+  'token',
+]
+
+const SECRET_PATTERN = new RegExp(
+  `"(${SECRET_FIELD_NAMES.join('|')})"\\s*:\\s*"([^"]*)"`,
+  'g',
+)
+
+const REDACT_MIN_LENGTH = 16
+
+export function redactSecrets(s: string): string {
+  return s.replace(SECRET_PATTERN, (_match, field: string, value: string) => {
+    if (value.length < REDACT_MIN_LENGTH) {
+      return `"${field}":"[REDACTED]"`
+    }
+    const redacted = `${value.slice(0, 8)}...${value.slice(-4)}`
+    return `"${field}":"${redacted}"`
+  })
+}
+
+/** Truncate a string for debug logging, collapsing newlines. */
+export function debugTruncate(s: string): string {
+  const flat = s.replace(/\n/g, '\\n')
+  if (flat.length <= DEBUG_MSG_LIMIT) {
+    return flat
+  }
+  return flat.slice(0, DEBUG_MSG_LIMIT) + `... (${flat.length} chars)`
+}
+
+/** Truncate a JSON-serializable value for debug logging. */
+export function debugBody(data: unknown): string {
+  const raw = typeof data === 'string' ? data : jsonStringify(data)
+  const s = redactSecrets(raw)
+  if (s.length <= DEBUG_MSG_LIMIT) {
+    return s
+  }
+  return s.slice(0, DEBUG_MSG_LIMIT) + `... (${s.length} chars)`
+}
+
+/**
+ * Extract a descriptive error message from an axios error (or any error).
+ * For HTTP errors, appends the server's response body message if available,
+ * since axios's default message only includes the status code.
+ */
+export function describeAxiosError(err: unknown): string {
+  const msg = errorMessage(err)
+  if (err && typeof err === 'object' && 'response' in err) {
+    const response = (err as { response?: { data?: unknown } }).response
+    if (response?.data && typeof response.data === 'object') {
+      const data = response.data as Record<string, unknown>
+      const detail =
+        typeof data.message === 'string'
+          ? data.message
+          : typeof data.error === 'object' &&
+              data.error &&
+              'message' in data.error &&
+              typeof (data.error as Record<string, unknown>).message ===
+                'string'
+            ? (data.error as Record<string, unknown>).message
+            : undefined
+      if (detail) {
+        return `${msg}: ${detail}`
+      }
+    }
+  }
+  return msg
+}
+
+/**
+ * Extract the HTTP status code from an axios error, if present.
+ * Returns undefined for non-HTTP errors (e.g. network failures).
+ */
+export function extractHttpStatus(err: unknown): number | undefined {
+  if (
+    err &&
+    typeof err === 'object' &&
+    'response' in err &&
+    (err as { response?: { status?: unknown } }).response &&
+    typeof (err as { response: { status?: unknown } }).response.status ===
+      'number'
+  ) {
+    return (err as { response: { status: number } }).response.status
+  }
+  return undefined
+}
+
+/**
+ * Pull a human-readable message out of an API error response body.
+ * Checks `data.message` first, then `data.error.message`.
+ */
+export function extractErrorDetail(data: unknown): string | undefined {
+  if (!data || typeof data !== 'object') return undefined
+  if ('message' in data && typeof data.message === 'string') {
+    return data.message
+  }
+  if (
+    'error' in data &&
+    data.error !== null &&
+    typeof data.error === 'object' &&
+    'message' in data.error &&
+    typeof data.error.message === 'string'
+  ) {
+    return data.error.message
+  }
+  return undefined
+}
+
+/**
+ * Log a bridge init skip — debug message + `tengu_bridge_repl_skipped`
+ * analytics event. Centralizes the event name and the AnalyticsMetadata
+ * cast so call sites don't each repeat the 5-line boilerplate.
+ */
+export function logBridgeSkip(
+  reason: string,
+  debugMsg?: string,
+  v2?: boolean,
+): void {
+  if (debugMsg) {
+    logForDebugging(debugMsg)
+  }
+  logEvent('tengu_bridge_repl_skipped', {
+    reason:
+      reason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    ...(v2 !== undefined && { v2 }),
+  })
+}

+ 165 - 0
src/bridge/envLessBridgeConfig.ts

@@ -0,0 +1,165 @@
+import { z } from 'zod/v4'
+import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import { lt } from '../utils/semver.js'
+import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
+
+export type EnvLessBridgeConfig = {
+  // withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
+  init_retry_max_attempts: number
+  init_retry_base_delay_ms: number
+  init_retry_jitter_fraction: number
+  init_retry_max_delay_ms: number
+  // axios timeout for POST /sessions, POST /bridge, POST /archive
+  http_timeout_ms: number
+  // BoundedUUIDSet ring size (echo + re-delivery dedup)
+  uuid_dedup_buffer_size: number
+  // CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
+  heartbeat_interval_ms: number
+  // ±fraction of interval — per-beat jitter to spread fleet load.
+  heartbeat_jitter_fraction: number
+  // Fire proactive JWT refresh this long before expires_in. Larger buffer =
+  // more frequent refresh (refresh cadence ≈ expires_in - buffer).
+  token_refresh_buffer_ms: number
+  // Archive POST timeout in teardown(). Distinct from http_timeout_ms because
+  // gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
+  // axios timeout on a slow/stalled archive burns the whole budget on a
+  // request that forceExit will kill anyway.
+  teardown_archive_timeout_ms: number
+  // Deadline for onConnect after transport.connect(). If neither onConnect
+  // nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
+  // — the only telemetry for the ~1% of sessions that emit `started` then
+  // go silent (no error, no event, just nothing).
+  connect_timeout_ms: number
+  // Semver floor for the env-less bridge path. Separate from the v1
+  // tengu_bridge_min_version config so a v2-specific bug can force upgrades
+  // without blocking v1 (env-based) clients, and vice versa.
+  min_version: string
+  // When true, tell users their claude.ai app may be too old to see v2
+  // sessions — lets us roll the v2 bridge before the app ships the new
+  // session-list query.
+  should_show_app_upgrade_message: boolean
+}
+
+export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
+  init_retry_max_attempts: 3,
+  init_retry_base_delay_ms: 500,
+  init_retry_jitter_fraction: 0.25,
+  init_retry_max_delay_ms: 4000,
+  http_timeout_ms: 10_000,
+  uuid_dedup_buffer_size: 2000,
+  heartbeat_interval_ms: 20_000,
+  heartbeat_jitter_fraction: 0.1,
+  token_refresh_buffer_ms: 300_000,
+  teardown_archive_timeout_ms: 1500,
+  connect_timeout_ms: 15_000,
+  min_version: '0.0.0',
+  should_show_app_upgrade_message: false,
+}
+
+// Floors reject the whole object on violation (fall back to DEFAULT) rather
+// than partially trusting — same defense-in-depth as pollConfig.ts.
+const envLessBridgeConfigSchema = lazySchema(() =>
+  z.object({
+    init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
+    init_retry_base_delay_ms: z.number().int().min(100).default(500),
+    init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
+    init_retry_max_delay_ms: z.number().int().min(500).default(4000),
+    http_timeout_ms: z.number().int().min(2000).default(10_000),
+    uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
+    // Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
+    heartbeat_interval_ms: z
+      .number()
+      .int()
+      .min(5000)
+      .max(30_000)
+      .default(20_000),
+    // ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
+    // still under the 60s TTL.
+    heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
+    // Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
+    // semantic inversion: ops entering expires_in-5min (the *delay until
+    // refresh*) instead of 5min (the *buffer before expiry*) yields
+    // delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
+    // durations so .min() alone can't distinguish; .max() catches the
+    // inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
+    token_refresh_buffer_ms: z
+      .number()
+      .int()
+      .min(30_000)
+      .max(1_800_000)
+      .default(300_000),
+    // Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
+    // timeout just lies to axios since forceExit kills the socket regardless.
+    teardown_archive_timeout_ms: z
+      .number()
+      .int()
+      .min(500)
+      .max(2000)
+      .default(1500),
+    // Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
+    // false-positive rate under transient slowness; cap 60s bounds how long
+    // a truly-stalled session stays dark.
+    connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
+    min_version: z
+      .string()
+      .refine(v => {
+        try {
+          lt(v, '0.0.0')
+          return true
+        } catch {
+          return false
+        }
+      })
+      .default('0.0.0'),
+    should_show_app_upgrade_message: z.boolean().default(false),
+  }),
+)
+
+/**
+ * Fetch the env-less bridge timing config from GrowthBook. Read once per
+ * initEnvLessBridgeCore call — config is fixed for the lifetime of a bridge
+ * session.
+ *
+ * Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
+ * runs well after GrowthBook init — initializeGrowthBook() resolves instantly,
+ * so there's no startup penalty, and we get the fresh in-memory remoteEval
+ * value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
+ * warns against startup-path usage, which this isn't.
+ */
+export async function getEnvLessBridgeConfig(): Promise<EnvLessBridgeConfig> {
+  const raw = await getFeatureValue_DEPRECATED<unknown>(
+    'tengu_bridge_repl_v2_config',
+    DEFAULT_ENV_LESS_BRIDGE_CONFIG,
+  )
+  const parsed = envLessBridgeConfigSchema().safeParse(raw)
+  return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
+}
+
+/**
+ * Returns an error message if the current CLI version is below the minimum
+ * required for the env-less (v2) bridge path, or null if the version is fine.
+ *
+ * v2 analogue of checkBridgeMinVersion() — reads from tengu_bridge_repl_v2_config
+ * instead of tengu_bridge_min_version so the two implementations can enforce
+ * independent floors.
+ */
+export async function checkEnvLessBridgeMinVersion(): Promise<string | null> {
+  const cfg = await getEnvLessBridgeConfig()
+  if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
+    return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
+  }
+  return null
+}
+
+/**
+ * Whether to nudge users toward upgrading their claude.ai app when a
+ * Remote Control session starts. True only when the v2 bridge is active
+ * AND the should_show_app_upgrade_message config bit is set — lets us
+ * roll the v2 bridge before the app ships the new session-list query.
+ */
+export async function shouldShowAppUpgradeMessage(): Promise<boolean> {
+  if (!isEnvLessBridgeEnabled()) return false
+  const cfg = await getEnvLessBridgeConfig()
+  return cfg.should_show_app_upgrade_message
+}

+ 71 - 0
src/bridge/flushGate.ts

@@ -0,0 +1,71 @@
+/**
+ * State machine for gating message writes during an initial flush.
+ *
+ * When a bridge session starts, historical messages are flushed to the
+ * server via a single HTTP POST. During that flush, new messages must
+ * be queued to prevent them from arriving at the server interleaved
+ * with the historical messages.
+ *
+ * Lifecycle:
+ *   start() → enqueue() returns true, items are queued
+ *   end()   → returns queued items for draining, enqueue() returns false
+ *   drop()  → discards queued items (permanent transport close)
+ *   deactivate() → clears active flag without dropping items
+ *                   (transport replacement — new transport will drain)
+ */
+export class FlushGate<T> {
+  private _active = false
+  private _pending: T[] = []
+
+  get active(): boolean {
+    return this._active
+  }
+
+  get pendingCount(): number {
+    return this._pending.length
+  }
+
+  /** Mark flush as in-progress. enqueue() will start queuing items. */
+  start(): void {
+    this._active = true
+  }
+
+  /**
+   * End the flush and return any queued items for draining.
+   * Caller is responsible for sending the returned items.
+   */
+  end(): T[] {
+    this._active = false
+    return this._pending.splice(0)
+  }
+
+  /**
+   * If flush is active, queue the items and return true.
+   * If flush is not active, return false (caller should send directly).
+   */
+  enqueue(...items: T[]): boolean {
+    if (!this._active) return false
+    this._pending.push(...items)
+    return true
+  }
+
+  /**
+   * Discard all queued items (permanent transport close).
+   * Returns the number of items dropped.
+   */
+  drop(): number {
+    this._active = false
+    const count = this._pending.length
+    this._pending.length = 0
+    return count
+  }
+
+  /**
+   * Clear the active flag without dropping queued items.
+   * Used when the transport is replaced (onWorkReceived) — the new
+   * transport's flush will drain the pending items.
+   */
+  deactivate(): void {
+    this._active = false
+  }
+}

+ 175 - 0
src/bridge/inboundAttachments.ts

@@ -0,0 +1,175 @@
+/**
+ * Resolve file_uuid attachments on inbound bridge user messages.
+ *
+ * Web composer uploads via cookie-authed /api/{org}/upload, sends file_uuid
+ * alongside the message. Here we fetch each via GET /api/oauth/files/{uuid}/content
+ * (oauth-authed, same store), write to ~/.claude/uploads/{sessionId}/, and
+ * return @path refs to prepend. Claude's Read tool takes it from there.
+ *
+ * Best-effort: any failure (no token, network, non-2xx, disk) logs debug and
+ * skips that attachment. The message still reaches Claude, just without @path.
+ */
+
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import axios from 'axios'
+import { randomUUID } from 'crypto'
+import { mkdir, writeFile } from 'fs/promises'
+import { basename, join } from 'path'
+import { z } from 'zod/v4'
+import { getSessionId } from '../bootstrap/state.js'
+import { logForDebugging } from '../utils/debug.js'
+import { getClaudeConfigHomeDir } from '../utils/envUtils.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import { getBridgeAccessToken, getBridgeBaseUrl } from './bridgeConfig.js'
+
+const DOWNLOAD_TIMEOUT_MS = 30_000
+
+function debug(msg: string): void {
+  logForDebugging(`[bridge:inbound-attach] ${msg}`)
+}
+
+const attachmentSchema = lazySchema(() =>
+  z.object({
+    file_uuid: z.string(),
+    file_name: z.string(),
+  }),
+)
+const attachmentsArraySchema = lazySchema(() => z.array(attachmentSchema()))
+
+export type InboundAttachment = z.infer<ReturnType<typeof attachmentSchema>>
+
+/** Pull file_attachments off a loosely-typed inbound message. */
+export function extractInboundAttachments(msg: unknown): InboundAttachment[] {
+  if (typeof msg !== 'object' || msg === null || !('file_attachments' in msg)) {
+    return []
+  }
+  const parsed = attachmentsArraySchema().safeParse(msg.file_attachments)
+  return parsed.success ? parsed.data : []
+}
+
+/**
+ * Strip path components and keep only filename-safe chars. file_name comes
+ * from the network (web composer), so treat it as untrusted even though the
+ * composer controls it.
+ */
+function sanitizeFileName(name: string): string {
+  const base = basename(name).replace(/[^a-zA-Z0-9._-]/g, '_')
+  return base || 'attachment'
+}
+
+function uploadsDir(): string {
+  return join(getClaudeConfigHomeDir(), 'uploads', getSessionId())
+}
+
+/**
+ * Fetch + write one attachment. Returns the absolute path on success,
+ * undefined on any failure.
+ */
+async function resolveOne(att: InboundAttachment): Promise<string | undefined> {
+  const token = getBridgeAccessToken()
+  if (!token) {
+    debug('skip: no oauth token')
+    return undefined
+  }
+
+  let data: Buffer
+  try {
+    // getOauthConfig() (via getBridgeBaseUrl) throws on a non-allowlisted
+    // CLAUDE_CODE_CUSTOM_OAUTH_URL — keep it inside the try so a bad
+    // FedStart URL degrades to "no @path" instead of crashing print.ts's
+    // reader loop (which has no catch around the await).
+    const url = `${getBridgeBaseUrl()}/api/oauth/files/${encodeURIComponent(att.file_uuid)}/content`
+    const response = await axios.get(url, {
+      headers: { Authorization: `Bearer ${token}` },
+      responseType: 'arraybuffer',
+      timeout: DOWNLOAD_TIMEOUT_MS,
+      validateStatus: () => true,
+    })
+    if (response.status !== 200) {
+      debug(`fetch ${att.file_uuid} failed: status=${response.status}`)
+      return undefined
+    }
+    data = Buffer.from(response.data)
+  } catch (e) {
+    debug(`fetch ${att.file_uuid} threw: ${e}`)
+    return undefined
+  }
+
+  // uuid-prefix makes collisions impossible across messages and within one
+  // (same filename, different files). 8 chars is enough — this isn't security.
+  const safeName = sanitizeFileName(att.file_name)
+  const prefix = (
+    att.file_uuid.slice(0, 8) || randomUUID().slice(0, 8)
+  ).replace(/[^a-zA-Z0-9_-]/g, '_')
+  const dir = uploadsDir()
+  const outPath = join(dir, `${prefix}-${safeName}`)
+
+  try {
+    await mkdir(dir, { recursive: true })
+    await writeFile(outPath, data)
+  } catch (e) {
+    debug(`write ${outPath} failed: ${e}`)
+    return undefined
+  }
+
+  debug(`resolved ${att.file_uuid} → ${outPath} (${data.length} bytes)`)
+  return outPath
+}
+
+/**
+ * Resolve all attachments on an inbound message to a prefix string of
+ * @path refs. Empty string if none resolved.
+ */
+export async function resolveInboundAttachments(
+  attachments: InboundAttachment[],
+): Promise<string> {
+  if (attachments.length === 0) return ''
+  debug(`resolving ${attachments.length} attachment(s)`)
+  const paths = await Promise.all(attachments.map(resolveOne))
+  const ok = paths.filter((p): p is string => p !== undefined)
+  if (ok.length === 0) return ''
+  // Quoted form — extractAtMentionedFiles truncates unquoted @refs at the
+  // first space, which breaks any home dir with spaces (/Users/John Smith/).
+  return ok.map(p => `@"${p}"`).join(' ') + ' '
+}
+
+/**
+ * Prepend @path refs to content, whichever form it's in.
+ * Targets the LAST text block — processUserInputBase reads inputString
+ * from processedBlocks[processedBlocks.length - 1], so putting refs in
+ * block[0] means they're silently ignored for [text, image] content.
+ */
+export function prependPathRefs(
+  content: string | Array<ContentBlockParam>,
+  prefix: string,
+): string | Array<ContentBlockParam> {
+  if (!prefix) return content
+  if (typeof content === 'string') return prefix + content
+  const i = content.findLastIndex(b => b.type === 'text')
+  if (i !== -1) {
+    const b = content[i]!
+    if (b.type === 'text') {
+      return [
+        ...content.slice(0, i),
+        { ...b, text: prefix + b.text },
+        ...content.slice(i + 1),
+      ]
+    }
+  }
+  // No text block — append one at the end so it's last.
+  return [...content, { type: 'text', text: prefix.trimEnd() }]
+}
+
+/**
+ * Convenience: extract + resolve + prepend. No-op when the message has no
+ * file_attachments field (fast path — no network, returns same reference).
+ */
+export async function resolveAndPrepend(
+  msg: unknown,
+  content: string | Array<ContentBlockParam>,
+): Promise<string | Array<ContentBlockParam>> {
+  const attachments = extractInboundAttachments(msg)
+  if (attachments.length === 0) return content
+  const prefix = await resolveInboundAttachments(attachments)
+  return prependPathRefs(content, prefix)
+}

+ 80 - 0
src/bridge/inboundMessages.ts

@@ -0,0 +1,80 @@
+import type {
+  Base64ImageSource,
+  ContentBlockParam,
+  ImageBlockParam,
+} from '@anthropic-ai/sdk/resources/messages.mjs'
+import type { UUID } from 'crypto'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import { detectImageFormatFromBase64 } from '../utils/imageResizer.js'
+
+/**
+ * Process an inbound user message from the bridge, extracting content
+ * and UUID for enqueueing. Supports both string content and
+ * ContentBlockParam[] (e.g. messages containing images).
+ *
+ * Normalizes image blocks from bridge clients that may use camelCase
+ * `mediaType` instead of snake_case `media_type` (mobile-apps#5825).
+ *
+ * Returns the extracted fields, or undefined if the message should be
+ * skipped (non-user type, missing/empty content).
+ */
+export function extractInboundMessageFields(
+  msg: SDKMessage,
+):
+  | { content: string | Array<ContentBlockParam>; uuid: UUID | undefined }
+  | undefined {
+  if (msg.type !== 'user') return undefined
+  const content = msg.message?.content
+  if (!content) return undefined
+  if (Array.isArray(content) && content.length === 0) return undefined
+
+  const uuid =
+    'uuid' in msg && typeof msg.uuid === 'string'
+      ? (msg.uuid as UUID)
+      : undefined
+
+  return {
+    content: Array.isArray(content) ? normalizeImageBlocks(content) : content,
+    uuid,
+  }
+}
+
+/**
+ * Normalize image content blocks from bridge clients. iOS/web clients may
+ * send `mediaType` (camelCase) instead of `media_type` (snake_case), or
+ * omit the field entirely. Without normalization, the bad block poisons
+ * the session — every subsequent API call fails with
+ * "media_type: Field required".
+ *
+ * Fast-path scan returns the original array reference when no
+ * normalization is needed (zero allocation on the happy path).
+ */
+export function normalizeImageBlocks(
+  blocks: Array<ContentBlockParam>,
+): Array<ContentBlockParam> {
+  if (!blocks.some(isMalformedBase64Image)) return blocks
+
+  return blocks.map(block => {
+    if (!isMalformedBase64Image(block)) return block
+    const src = block.source as unknown as Record<string, unknown>
+    const mediaType =
+      typeof src.mediaType === 'string' && src.mediaType
+        ? src.mediaType
+        : detectImageFormatFromBase64(block.source.data)
+    return {
+      ...block,
+      source: {
+        type: 'base64' as const,
+        media_type: mediaType as Base64ImageSource['media_type'],
+        data: block.source.data,
+      },
+    }
+  })
+}
+
+function isMalformedBase64Image(
+  block: ContentBlockParam,
+): block is ImageBlockParam & { source: Base64ImageSource } {
+  if (block.type !== 'image' || block.source?.type !== 'base64') return false
+  return !(block.source as unknown as Record<string, unknown>).media_type
+}

+ 569 - 0
src/bridge/initReplBridge.ts

@@ -0,0 +1,569 @@
+/**
+ * REPL-specific wrapper around initBridgeCore. Owns the parts that read
+ * bootstrap state — gates, cwd, session ID, git context, OAuth, title
+ * derivation — then delegates to the bootstrap-free core.
+ *
+ * Split out of replBridge.ts because the sessionStorage import
+ * (getCurrentSessionTitle) transitively pulls in src/commands.ts → the
+ * entire slash command + React component tree (~1300 modules). Keeping
+ * initBridgeCore in a file that doesn't touch sessionStorage lets
+ * daemonBridge.ts import the core without bloating the Agent SDK bundle.
+ *
+ * Called via dynamic import by useReplBridge (auto-start) and print.ts
+ * (SDK -p mode via query.enableRemoteControl).
+ */
+
+import { feature } from 'bun:bundle'
+import { hostname } from 'os'
+import { getOriginalCwd, getSessionId } from '../bootstrap/state.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type { SDKControlResponse } from '../entrypoints/sdk/controlTypes.js'
+import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
+import { getOrganizationUUID } from '../services/oauth/client.js'
+import {
+  isPolicyAllowed,
+  waitForPolicyLimitsToLoad,
+} from '../services/policyLimits/index.js'
+import type { Message } from '../types/message.js'
+import {
+  checkAndRefreshOAuthTokenIfNeeded,
+  getClaudeAIOAuthTokens,
+  handleOAuth401Error,
+} from '../utils/auth.js'
+import { getGlobalConfig, saveGlobalConfig } from '../utils/config.js'
+import { logForDebugging } from '../utils/debug.js'
+import { stripDisplayTagsAllowEmpty } from '../utils/displayTags.js'
+import { errorMessage } from '../utils/errors.js'
+import { getBranch, getRemoteUrl } from '../utils/git.js'
+import { toSDKMessages } from '../utils/messages/mappers.js'
+import {
+  getContentText,
+  getMessagesAfterCompactBoundary,
+  isSyntheticMessage,
+} from '../utils/messages.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+import { getCurrentSessionTitle } from '../utils/sessionStorage.js'
+import {
+  extractConversationText,
+  generateSessionTitle,
+} from '../utils/sessionTitle.js'
+import { generateShortWordSlug } from '../utils/words.js'
+import {
+  getBridgeAccessToken,
+  getBridgeBaseUrl,
+  getBridgeTokenOverride,
+} from './bridgeConfig.js'
+import {
+  checkBridgeMinVersion,
+  isBridgeEnabledBlocking,
+  isCseShimEnabled,
+  isEnvLessBridgeEnabled,
+} from './bridgeEnabled.js'
+import {
+  archiveBridgeSession,
+  createBridgeSession,
+  updateBridgeSessionTitle,
+} from './createSession.js'
+import { logBridgeSkip } from './debugUtils.js'
+import { checkEnvLessBridgeMinVersion } from './envLessBridgeConfig.js'
+import { getPollIntervalConfig } from './pollConfig.js'
+import type { BridgeState, ReplBridgeHandle } from './replBridge.js'
+import { initBridgeCore } from './replBridge.js'
+import { setCseShimGate } from './sessionIdCompat.js'
+import type { BridgeWorkerType } from './types.js'
+
+export type InitBridgeOptions = {
+  onInboundMessage?: (msg: SDKMessage) => void | Promise<void>
+  onPermissionResponse?: (response: SDKControlResponse) => void
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  initialMessages?: Message[]
+  // Explicit session name from `/remote-control <name>`. When set, overrides
+  // the title derived from the conversation or /rename.
+  initialName?: string
+  // Fresh view of the full conversation at call time. Used by onUserMessage's
+  // count-3 derivation to call generateSessionTitle over the full conversation.
+  // Optional — print.ts's SDK enableRemoteControl path has no REPL message
+  // array; count-3 falls back to the single message text when absent.
+  getMessages?: () => Message[]
+  // UUIDs already flushed in a prior bridge session. Messages with these
+  // UUIDs are excluded from the initial flush to avoid poisoning the
+  // server (duplicate UUIDs across sessions cause the WS to be killed).
+  // Mutated in place — newly flushed UUIDs are added after each flush.
+  previouslyFlushedUUIDs?: Set<string>
+  /** See BridgeCoreParams.perpetual. */
+  perpetual?: boolean
+  /**
+   * When true, the bridge only forwards events outbound (no SSE inbound
+   * stream). Used by CCR mirror mode — local sessions visible on claude.ai
+   * without enabling inbound control.
+   */
+  outboundOnly?: boolean
+  tags?: string[]
+}
+
+export async function initReplBridge(
+  options?: InitBridgeOptions,
+): Promise<ReplBridgeHandle | null> {
+  const {
+    onInboundMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    initialMessages,
+    getMessages,
+    previouslyFlushedUUIDs,
+    initialName,
+    perpetual,
+    outboundOnly,
+    tags,
+  } = options ?? {}
+
+  // Wire the cse_ shim kill switch so toCompatSessionId respects the
+  // GrowthBook gate. Daemon/SDK paths skip this — shim defaults to active.
+  setCseShimGate(isCseShimEnabled)
+
+  // 1. Runtime gate
+  if (!(await isBridgeEnabledBlocking())) {
+    logBridgeSkip('not_enabled', '[bridge:repl] Skipping: bridge not enabled')
+    return null
+  }
+
+  // 1b. Minimum version check — deferred to after the v1/v2 branch below,
+  // since each implementation has its own floor (tengu_bridge_min_version
+  // for v1, tengu_bridge_repl_v2_config.min_version for v2).
+
+  // 2. Check OAuth — must be signed in with claude.ai. Runs before the
+  // policy check so console-auth users get the actionable "/login" hint
+  // instead of a misleading policy error from a stale/wrong-org cache.
+  if (!getBridgeAccessToken()) {
+    logBridgeSkip('no_oauth', '[bridge:repl] Skipping: no OAuth tokens')
+    onStateChange?.('failed', '/login')
+    return null
+  }
+
+  // 3. Check organization policy — remote control may be disabled
+  await waitForPolicyLimitsToLoad()
+  if (!isPolicyAllowed('allow_remote_control')) {
+    logBridgeSkip(
+      'policy_denied',
+      '[bridge:repl] Skipping: allow_remote_control policy not allowed',
+    )
+    onStateChange?.('failed', "disabled by your organization's policy")
+    return null
+  }
+
+  // When CLAUDE_BRIDGE_OAUTH_TOKEN is set (ant-only local dev), the bridge
+  // uses that token directly via getBridgeAccessToken() — keychain state is
+  // irrelevant. Skip 2b/2c to preserve that decoupling: an expired keychain
+  // token shouldn't block a bridge connection that doesn't use it.
+  if (!getBridgeTokenOverride()) {
+    // 2a. Cross-process backoff. If N prior processes already saw this exact
+    // dead token (matched by expiresAt), skip silently — no event, no refresh
+    // attempt. The count threshold tolerates transient refresh failures (auth
+    // server 5xx, lockfile errors per auth.ts:1437/1444/1485): each process
+    // independently retries until 3 consecutive failures prove the token dead.
+    // Mirrors useReplBridge's MAX_CONSECUTIVE_INIT_FAILURES for in-process.
+    // The expiresAt key is content-addressed: /login → new token → new expiresAt
+    // → this stops matching without any explicit clear.
+    const cfg = getGlobalConfig()
+    if (
+      cfg.bridgeOauthDeadExpiresAt != null &&
+      (cfg.bridgeOauthDeadFailCount ?? 0) >= 3 &&
+      getClaudeAIOAuthTokens()?.expiresAt === cfg.bridgeOauthDeadExpiresAt
+    ) {
+      logForDebugging(
+        `[bridge:repl] Skipping: cross-process backoff (dead token seen ${cfg.bridgeOauthDeadFailCount} times)`,
+      )
+      return null
+    }
+
+    // 2b. Proactively refresh if expired. Mirrors bridgeMain.ts:2096 — the REPL
+    // bridge fires at useEffect mount BEFORE any v1/messages call, making this
+    // usually the first OAuth request of the session. Without this, ~9% of
+    // registrations hit the server with a >8h-expired token → 401 → withOAuthRetry
+    // recovers, but the server logs a 401 we can avoid. VPN egress IPs observed
+    // at 30:1 401:200 when many unrelated users cluster at the 8h TTL boundary.
+    //
+    // Fresh-token cost: one memoized read + one Date.now() comparison (~µs).
+    // checkAndRefreshOAuthTokenIfNeeded clears its own cache in every path that
+    // touches the keychain (refresh success, lockfile race, throw), so no
+    // explicit clearOAuthTokenCache() here — that would force a blocking
+    // keychain spawn on the 91%+ fresh-token path.
+    await checkAndRefreshOAuthTokenIfNeeded()
+
+    // 2c. Skip if token is still expired post-refresh-attempt. Env-var / FD
+    // tokens (auth.ts:894-917) have expiresAt=null → never trip this. But a
+    // keychain token whose refresh token is dead (password change, org left,
+    // token GC'd) has expiresAt<now AND refresh just failed — the client would
+    // otherwise loop 401 forever: withOAuthRetry → handleOAuth401Error →
+    // refresh fails again → retry with same stale token → 401 again.
+    // Datadog 2026-03-08: single IPs generating 2,879 such 401s/day. Skip the
+    // guaranteed-fail API call; useReplBridge surfaces the failure.
+    //
+    // Intentionally NOT using isOAuthTokenExpired here — that has a 5-minute
+    // proactive-refresh buffer, which is the right heuristic for "should
+    // refresh soon" but wrong for "provably unusable". A token with 3min left
+    // + transient refresh endpoint blip (5xx/timeout/wifi-reconnect) would
+    // falsely trip a buffered check; the still-valid token would connect fine.
+    // Check actual expiry instead: past-expiry AND refresh-failed → truly dead.
+    const tokens = getClaudeAIOAuthTokens()
+    if (tokens && tokens.expiresAt !== null && tokens.expiresAt <= Date.now()) {
+      logBridgeSkip(
+        'oauth_expired_unrefreshable',
+        '[bridge:repl] Skipping: OAuth token expired and refresh failed (re-login required)',
+      )
+      onStateChange?.('failed', '/login')
+      // Persist for the next process. Increments failCount when re-discovering
+      // the same dead token (matched by expiresAt); resets to 1 for a different
+      // token. Once count reaches 3, step 2a's early-return fires and this path
+      // is never reached again — writes are capped at 3 per dead token.
+      // Local const captures the narrowed type (closure loses !==null narrowing).
+      const deadExpiresAt = tokens.expiresAt
+      saveGlobalConfig(c => ({
+        ...c,
+        bridgeOauthDeadExpiresAt: deadExpiresAt,
+        bridgeOauthDeadFailCount:
+          c.bridgeOauthDeadExpiresAt === deadExpiresAt
+            ? (c.bridgeOauthDeadFailCount ?? 0) + 1
+            : 1,
+      }))
+      return null
+    }
+  }
+
+  // 4. Compute baseUrl — needed by both v1 (env-based) and v2 (env-less)
+  // paths. Hoisted above the v2 gate so both can use it.
+  const baseUrl = getBridgeBaseUrl()
+
+  // 5. Derive session title. Precedence: explicit initialName → /rename
+  // (session storage) → last meaningful user message → generated slug.
+  // Cosmetic only (claude.ai session list); the model never sees it.
+  // Two flags: `hasExplicitTitle` (initialName or /rename — never auto-
+  // overwrite) vs. `hasTitle` (any title, including auto-derived — blocks
+  // the count-1 re-derivation but not count-3). The onUserMessage callback
+  // (wired to both v1 and v2 below) derives from the 1st prompt and again
+  // from the 3rd so mobile/web show a title that reflects more context.
+  // The slug fallback (e.g. "remote-control-graceful-unicorn") makes
+  // auto-started sessions distinguishable in the claude.ai list before the
+  // first prompt.
+  let title = `remote-control-${generateShortWordSlug()}`
+  let hasTitle = false
+  let hasExplicitTitle = false
+  if (initialName) {
+    title = initialName
+    hasTitle = true
+    hasExplicitTitle = true
+  } else {
+    const sessionId = getSessionId()
+    const customTitle = sessionId
+      ? getCurrentSessionTitle(sessionId)
+      : undefined
+    if (customTitle) {
+      title = customTitle
+      hasTitle = true
+      hasExplicitTitle = true
+    } else if (initialMessages && initialMessages.length > 0) {
+      // Find the last user message that has meaningful content. Skip meta
+      // (nudges), tool results, compact summaries ("This session is being
+      // continued…"), non-human origins (task notifications, channel pushes),
+      // and synthetic interrupts ([Request interrupted by user]) — none are
+      // human-authored. Same filter as extractTitleText + isSyntheticMessage.
+      for (let i = initialMessages.length - 1; i >= 0; i--) {
+        const msg = initialMessages[i]!
+        if (
+          msg.type !== 'user' ||
+          msg.isMeta ||
+          msg.toolUseResult ||
+          msg.isCompactSummary ||
+          (msg.origin && msg.origin.kind !== 'human') ||
+          isSyntheticMessage(msg)
+        )
+          continue
+        const rawContent = getContentText(msg.message.content)
+        if (!rawContent) continue
+        const derived = deriveTitle(rawContent)
+        if (!derived) continue
+        title = derived
+        hasTitle = true
+        break
+      }
+    }
+  }
+
+  // Shared by both v1 and v2 — fires on every title-worthy user message until
+  // it returns true. At count 1: deriveTitle placeholder immediately, then
+  // generateSessionTitle (Haiku, sentence-case) fire-and-forget upgrade. At
+  // count 3: re-generate over the full conversation. Skips entirely if the
+  // title is explicit (/remote-control <name> or /rename) — re-checks
+  // sessionStorage at call time so /rename between messages isn't clobbered.
+  // Skips count 1 if initialMessages already derived (that title is fresh);
+  // still refreshes at count 3. v2 passes cse_*; updateBridgeSessionTitle
+  // retags internally.
+  let userMessageCount = 0
+  let lastBridgeSessionId: string | undefined
+  let genSeq = 0
+  const patch = (
+    derived: string,
+    bridgeSessionId: string,
+    atCount: number,
+  ): void => {
+    hasTitle = true
+    title = derived
+    logForDebugging(
+      `[bridge:repl] derived title from message ${atCount}: ${derived}`,
+    )
+    void updateBridgeSessionTitle(bridgeSessionId, derived, {
+      baseUrl,
+      getAccessToken: getBridgeAccessToken,
+    }).catch(() => {})
+  }
+  // Fire-and-forget Haiku generation with post-await guards. Re-checks /rename
+  // (sessionStorage), v1 env-lost (lastBridgeSessionId), and same-session
+  // out-of-order resolution (genSeq — count-1's Haiku resolving after count-3
+  // would clobber the richer title). generateSessionTitle never rejects.
+  const generateAndPatch = (input: string, bridgeSessionId: string): void => {
+    const gen = ++genSeq
+    const atCount = userMessageCount
+    void generateSessionTitle(input, AbortSignal.timeout(15_000)).then(
+      generated => {
+        if (
+          generated &&
+          gen === genSeq &&
+          lastBridgeSessionId === bridgeSessionId &&
+          !getCurrentSessionTitle(getSessionId())
+        ) {
+          patch(generated, bridgeSessionId, atCount)
+        }
+      },
+    )
+  }
+  const onUserMessage = (text: string, bridgeSessionId: string): boolean => {
+    if (hasExplicitTitle || getCurrentSessionTitle(getSessionId())) {
+      return true
+    }
+    // v1 env-lost re-creates the session with a new ID. Reset the count so
+    // the new session gets its own count-3 derivation; hasTitle stays true
+    // (new session was created via getCurrentTitle(), which reads the count-1
+    // title from this closure), so count-1 of the fresh cycle correctly skips.
+    if (
+      lastBridgeSessionId !== undefined &&
+      lastBridgeSessionId !== bridgeSessionId
+    ) {
+      userMessageCount = 0
+    }
+    lastBridgeSessionId = bridgeSessionId
+    userMessageCount++
+    if (userMessageCount === 1 && !hasTitle) {
+      const placeholder = deriveTitle(text)
+      if (placeholder) patch(placeholder, bridgeSessionId, userMessageCount)
+      generateAndPatch(text, bridgeSessionId)
+    } else if (userMessageCount === 3) {
+      const msgs = getMessages?.()
+      const input = msgs
+        ? extractConversationText(getMessagesAfterCompactBoundary(msgs))
+        : text
+      generateAndPatch(input, bridgeSessionId)
+    }
+    // Also re-latches if v1 env-lost resets the transport's done flag past 3.
+    return userMessageCount >= 3
+  }
+
+  const initialHistoryCap = getFeatureValue_CACHED_WITH_REFRESH(
+    'tengu_bridge_initial_history_cap',
+    200,
+    5 * 60 * 1000,
+  )
+
+  // Fetch orgUUID before the v1/v2 branch — both paths need it. v1 for
+  // environment registration; v2 for archive (which lives at the compat
+  // /v1/sessions/{id}/archive, not /v1/code/sessions). Without it, v2
+  // archive 404s and sessions stay alive in CCR after /exit.
+  const orgUUID = await getOrganizationUUID()
+  if (!orgUUID) {
+    logBridgeSkip('no_org_uuid', '[bridge:repl] Skipping: no org UUID')
+    onStateChange?.('failed', '/login')
+    return null
+  }
+
+  // ── GrowthBook gate: env-less bridge ──────────────────────────────────
+  // When enabled, skips the Environments API layer entirely (no register/
+  // poll/ack/heartbeat) and connects directly via POST /bridge → worker_jwt.
+  // See server PR #292605 (renamed in #293280). REPL-only — daemon/print stay
+  // on env-based.
+  //
+  // NAMING: "env-less" is distinct from "CCR v2" (the /worker/* transport).
+  // The env-based path below can ALSO use CCR v2 via CLAUDE_CODE_USE_CCR_V2.
+  // tengu_bridge_repl_v2 gates env-less (no poll loop), not transport version.
+  //
+  // perpetual (assistant-mode session continuity via bridge-pointer.json) is
+  // env-coupled and not yet implemented here — fall back to env-based when set
+  // so KAIROS users don't silently lose cross-restart continuity.
+  if (isEnvLessBridgeEnabled() && !perpetual) {
+    const versionError = await checkEnvLessBridgeMinVersion()
+    if (versionError) {
+      logBridgeSkip(
+        'version_too_old',
+        `[bridge:repl] Skipping: ${versionError}`,
+        true,
+      )
+      onStateChange?.('failed', 'run `claude update` to upgrade')
+      return null
+    }
+    logForDebugging(
+      '[bridge:repl] Using env-less bridge path (tengu_bridge_repl_v2)',
+    )
+    const { initEnvLessBridgeCore } = await import('./remoteBridgeCore.js')
+    return initEnvLessBridgeCore({
+      baseUrl,
+      orgUUID,
+      title,
+      getAccessToken: getBridgeAccessToken,
+      onAuth401: handleOAuth401Error,
+      toSDKMessages,
+      initialHistoryCap,
+      initialMessages,
+      // v2 always creates a fresh server session (new cse_* id), so
+      // previouslyFlushedUUIDs is not passed — there's no cross-session
+      // UUID collision risk, and the ref persists across enable→disable→
+      // re-enable cycles which would cause the new session to receive zero
+      // history (all UUIDs already in the set from the prior enable).
+      // v1 handles this by calling previouslyFlushedUUIDs.clear() on fresh
+      // session creation (replBridge.ts:768); v2 skips the param entirely.
+      onInboundMessage,
+      onUserMessage,
+      onPermissionResponse,
+      onInterrupt,
+      onSetModel,
+      onSetMaxThinkingTokens,
+      onSetPermissionMode,
+      onStateChange,
+      outboundOnly,
+      tags,
+    })
+  }
+
+  // ── v1 path: env-based (register/poll/ack/heartbeat) ──────────────────
+
+  const versionError = checkBridgeMinVersion()
+  if (versionError) {
+    logBridgeSkip('version_too_old', `[bridge:repl] Skipping: ${versionError}`)
+    onStateChange?.('failed', 'run `claude update` to upgrade')
+    return null
+  }
+
+  // Gather git context — this is the bootstrap-read boundary.
+  // Everything from here down is passed explicitly to bridgeCore.
+  const branch = await getBranch()
+  const gitRepoUrl = await getRemoteUrl()
+  const sessionIngressUrl =
+    process.env.USER_TYPE === 'ant' &&
+    process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      ? process.env.CLAUDE_BRIDGE_SESSION_INGRESS_URL
+      : baseUrl
+
+  // Assistant-mode sessions advertise a distinct worker_type so the web UI
+  // can filter them into a dedicated picker. KAIROS guard keeps the
+  // assistant module out of external builds entirely.
+  let workerType: BridgeWorkerType = 'claude_code'
+  if (feature('KAIROS')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { isAssistantMode } =
+      require('../assistant/index.js') as typeof import('../assistant/index.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    if (isAssistantMode()) {
+      workerType = 'claude_code_assistant'
+    }
+  }
+
+  // 6. Delegate. BridgeCoreHandle is a structural superset of
+  // ReplBridgeHandle (adds writeSdkMessages which REPL callers don't use),
+  // so no adapter needed — just the narrower type on the way out.
+  return initBridgeCore({
+    dir: getOriginalCwd(),
+    machineName: hostname(),
+    branch,
+    gitRepoUrl,
+    title,
+    baseUrl,
+    sessionIngressUrl,
+    workerType,
+    getAccessToken: getBridgeAccessToken,
+    createSession: opts =>
+      createBridgeSession({
+        ...opts,
+        events: [],
+        baseUrl,
+        getAccessToken: getBridgeAccessToken,
+      }),
+    archiveSession: sessionId =>
+      archiveBridgeSession(sessionId, {
+        baseUrl,
+        getAccessToken: getBridgeAccessToken,
+        // gracefulShutdown.ts:407 races runCleanupFunctions against 2s.
+        // Teardown also does stopWork (parallel) + deregister (sequential),
+        // so archive can't have the full budget. 1.5s matches v2's
+        // teardown_archive_timeout_ms default.
+        timeoutMs: 1500,
+      }).catch((err: unknown) => {
+        // archiveBridgeSession has no try/catch — 5xx/timeout/network throw
+        // straight through. Previously swallowed silently, making archive
+        // failures BQ-invisible and undiagnosable from debug logs.
+        logForDebugging(
+          `[bridge:repl] archiveBridgeSession threw: ${errorMessage(err)}`,
+          { level: 'error' },
+        )
+      }),
+    // getCurrentTitle is read on reconnect-after-env-lost to re-title the new
+    // session. /rename writes to session storage; onUserMessage mutates
+    // `title` directly — both paths are picked up here.
+    getCurrentTitle: () => getCurrentSessionTitle(getSessionId()) ?? title,
+    onUserMessage,
+    toSDKMessages,
+    onAuth401: handleOAuth401Error,
+    getPollIntervalConfig,
+    initialHistoryCap,
+    initialMessages,
+    previouslyFlushedUUIDs,
+    onInboundMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    perpetual,
+  })
+}
+
+const TITLE_MAX_LEN = 50
+
+/**
+ * Quick placeholder title: strip display tags, take the first sentence,
+ * collapse whitespace, truncate to 50 chars. Returns undefined if the result
+ * is empty (e.g. message was only <local-command-stdout>). Replaced by
+ * generateSessionTitle once Haiku resolves (~1-15s).
+ */
+function deriveTitle(raw: string): string | undefined {
+  // Strip <ide_opened_file>, <session-start-hook>, etc. — these appear in
+  // user messages when IDE/hooks inject context. stripDisplayTagsAllowEmpty
+  // returns '' (not the original) so pure-tag messages are skipped.
+  const clean = stripDisplayTagsAllowEmpty(raw)
+  // First sentence is usually the intent; rest is often context/detail.
+  // Capture group instead of lookbehind — keeps YARR JIT happy.
+  const firstSentence = /^(.*?[.!?])\s/.exec(clean)?.[1] ?? clean
+  // Collapse newlines/tabs — titles are single-line in the claude.ai list.
+  const flat = firstSentence.replace(/\s+/g, ' ').trim()
+  if (!flat) return undefined
+  return flat.length > TITLE_MAX_LEN
+    ? flat.slice(0, TITLE_MAX_LEN - 1) + '\u2026'
+    : flat
+}

+ 256 - 0
src/bridge/jwtUtils.ts

@@ -0,0 +1,256 @@
+import { logEvent } from '../services/analytics/index.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { errorMessage } from '../utils/errors.js'
+import { jsonParse } from '../utils/slowOperations.js'
+
+/** Format a millisecond duration as a human-readable string (e.g. "5m 30s"). */
+function formatDuration(ms: number): string {
+  if (ms < 60_000) return `${Math.round(ms / 1000)}s`
+  const m = Math.floor(ms / 60_000)
+  const s = Math.round((ms % 60_000) / 1000)
+  return s > 0 ? `${m}m ${s}s` : `${m}m`
+}
+
+/**
+ * Decode a JWT's payload segment without verifying the signature.
+ * Strips the `sk-ant-si-` session-ingress prefix if present.
+ * Returns the parsed JSON payload as `unknown`, or `null` if the
+ * token is malformed or the payload is not valid JSON.
+ */
+export function decodeJwtPayload(token: string): unknown | null {
+  const jwt = token.startsWith('sk-ant-si-')
+    ? token.slice('sk-ant-si-'.length)
+    : token
+  const parts = jwt.split('.')
+  if (parts.length !== 3 || !parts[1]) return null
+  try {
+    return jsonParse(Buffer.from(parts[1], 'base64url').toString('utf8'))
+  } catch {
+    return null
+  }
+}
+
+/**
+ * Decode the `exp` (expiry) claim from a JWT without verifying the signature.
+ * @returns The `exp` value in Unix seconds, or `null` if unparseable
+ */
+export function decodeJwtExpiry(token: string): number | null {
+  const payload = decodeJwtPayload(token)
+  if (
+    payload !== null &&
+    typeof payload === 'object' &&
+    'exp' in payload &&
+    typeof payload.exp === 'number'
+  ) {
+    return payload.exp
+  }
+  return null
+}
+
+/** Refresh buffer: request a new token before expiry. */
+const TOKEN_REFRESH_BUFFER_MS = 5 * 60 * 1000
+
+/** Fallback refresh interval when the new token's expiry is unknown. */
+const FALLBACK_REFRESH_INTERVAL_MS = 30 * 60 * 1000 // 30 minutes
+
+/** Max consecutive failures before giving up on the refresh chain. */
+const MAX_REFRESH_FAILURES = 3
+
+/** Retry delay when getAccessToken returns undefined. */
+const REFRESH_RETRY_DELAY_MS = 60_000
+
+/**
+ * Creates a token refresh scheduler that proactively refreshes session tokens
+ * before they expire. Used by both the standalone bridge and the REPL bridge.
+ *
+ * When a token is about to expire, the scheduler calls `onRefresh` with the
+ * session ID and the bridge's OAuth access token. The caller is responsible
+ * for delivering the token to the appropriate transport (child process stdin
+ * for standalone bridge, WebSocket reconnect for REPL bridge).
+ */
+export function createTokenRefreshScheduler({
+  getAccessToken,
+  onRefresh,
+  label,
+  refreshBufferMs = TOKEN_REFRESH_BUFFER_MS,
+}: {
+  getAccessToken: () => string | undefined | Promise<string | undefined>
+  onRefresh: (sessionId: string, oauthToken: string) => void
+  label: string
+  /** How long before expiry to fire refresh. Defaults to 5 min. */
+  refreshBufferMs?: number
+}): {
+  schedule: (sessionId: string, token: string) => void
+  scheduleFromExpiresIn: (sessionId: string, expiresInSeconds: number) => void
+  cancel: (sessionId: string) => void
+  cancelAll: () => void
+} {
+  const timers = new Map<string, ReturnType<typeof setTimeout>>()
+  const failureCounts = new Map<string, number>()
+  // Generation counter per session — incremented by schedule() and cancel()
+  // so that in-flight async doRefresh() calls can detect when they've been
+  // superseded and should skip setting follow-up timers.
+  const generations = new Map<string, number>()
+
+  function nextGeneration(sessionId: string): number {
+    const gen = (generations.get(sessionId) ?? 0) + 1
+    generations.set(sessionId, gen)
+    return gen
+  }
+
+  function schedule(sessionId: string, token: string): void {
+    const expiry = decodeJwtExpiry(token)
+    if (!expiry) {
+      // Token is not a decodable JWT (e.g. an OAuth token passed from the
+      // REPL bridge WebSocket open handler).  Preserve any existing timer
+      // (such as the follow-up refresh set by doRefresh) so the refresh
+      // chain is not broken.
+      logForDebugging(
+        `[${label}:token] Could not decode JWT expiry for sessionId=${sessionId}, token prefix=${token.slice(0, 15)}…, keeping existing timer`,
+      )
+      return
+    }
+
+    // Clear any existing refresh timer — we have a concrete expiry to replace it.
+    const existing = timers.get(sessionId)
+    if (existing) {
+      clearTimeout(existing)
+    }
+
+    // Bump generation to invalidate any in-flight async doRefresh.
+    const gen = nextGeneration(sessionId)
+
+    const expiryDate = new Date(expiry * 1000).toISOString()
+    const delayMs = expiry * 1000 - Date.now() - refreshBufferMs
+    if (delayMs <= 0) {
+      logForDebugging(
+        `[${label}:token] Token for sessionId=${sessionId} expires=${expiryDate} (past or within buffer), refreshing immediately`,
+      )
+      void doRefresh(sessionId, gen)
+      return
+    }
+
+    logForDebugging(
+      `[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires=${expiryDate}, buffer=${refreshBufferMs / 1000}s)`,
+    )
+
+    const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
+    timers.set(sessionId, timer)
+  }
+
+  /**
+   * Schedule refresh using an explicit TTL (seconds until expiry) rather
+   * than decoding a JWT's exp claim. Used by callers whose JWT is opaque
+   * (e.g. POST /v1/code/sessions/{id}/bridge returns expires_in directly).
+   */
+  function scheduleFromExpiresIn(
+    sessionId: string,
+    expiresInSeconds: number,
+  ): void {
+    const existing = timers.get(sessionId)
+    if (existing) clearTimeout(existing)
+    const gen = nextGeneration(sessionId)
+    // Clamp to 30s floor — if refreshBufferMs exceeds the server's expires_in
+    // (e.g. very large buffer for frequent-refresh testing, or server shortens
+    // expires_in unexpectedly), unclamped delayMs ≤ 0 would tight-loop.
+    const delayMs = Math.max(expiresInSeconds * 1000 - refreshBufferMs, 30_000)
+    logForDebugging(
+      `[${label}:token] Scheduled token refresh for sessionId=${sessionId} in ${formatDuration(delayMs)} (expires_in=${expiresInSeconds}s, buffer=${refreshBufferMs / 1000}s)`,
+    )
+    const timer = setTimeout(doRefresh, delayMs, sessionId, gen)
+    timers.set(sessionId, timer)
+  }
+
+  async function doRefresh(sessionId: string, gen: number): Promise<void> {
+    let oauthToken: string | undefined
+    try {
+      oauthToken = await getAccessToken()
+    } catch (err) {
+      logForDebugging(
+        `[${label}:token] getAccessToken threw for sessionId=${sessionId}: ${errorMessage(err)}`,
+        { level: 'error' },
+      )
+    }
+
+    // If the session was cancelled or rescheduled while we were awaiting,
+    // the generation will have changed — bail out to avoid orphaned timers.
+    if (generations.get(sessionId) !== gen) {
+      logForDebugging(
+        `[${label}:token] doRefresh for sessionId=${sessionId} stale (gen ${gen} vs ${generations.get(sessionId)}), skipping`,
+      )
+      return
+    }
+
+    if (!oauthToken) {
+      const failures = (failureCounts.get(sessionId) ?? 0) + 1
+      failureCounts.set(sessionId, failures)
+      logForDebugging(
+        `[${label}:token] No OAuth token available for refresh, sessionId=${sessionId} (failure ${failures}/${MAX_REFRESH_FAILURES})`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'bridge_token_refresh_no_oauth')
+      // Schedule a retry so the refresh chain can recover if the token
+      // becomes available again (e.g. transient cache clear during refresh).
+      // Cap retries to avoid spamming on genuine failures.
+      if (failures < MAX_REFRESH_FAILURES) {
+        const retryTimer = setTimeout(
+          doRefresh,
+          REFRESH_RETRY_DELAY_MS,
+          sessionId,
+          gen,
+        )
+        timers.set(sessionId, retryTimer)
+      }
+      return
+    }
+
+    // Reset failure counter on successful token retrieval
+    failureCounts.delete(sessionId)
+
+    logForDebugging(
+      `[${label}:token] Refreshing token for sessionId=${sessionId}: new token prefix=${oauthToken.slice(0, 15)}…`,
+    )
+    logEvent('tengu_bridge_token_refreshed', {})
+    onRefresh(sessionId, oauthToken)
+
+    // Schedule a follow-up refresh so long-running sessions stay authenticated.
+    // Without this, the initial one-shot timer leaves the session vulnerable
+    // to token expiry if it runs past the first refresh window.
+    const timer = setTimeout(
+      doRefresh,
+      FALLBACK_REFRESH_INTERVAL_MS,
+      sessionId,
+      gen,
+    )
+    timers.set(sessionId, timer)
+    logForDebugging(
+      `[${label}:token] Scheduled follow-up refresh for sessionId=${sessionId} in ${formatDuration(FALLBACK_REFRESH_INTERVAL_MS)}`,
+    )
+  }
+
+  function cancel(sessionId: string): void {
+    // Bump generation to invalidate any in-flight async doRefresh.
+    nextGeneration(sessionId)
+    const timer = timers.get(sessionId)
+    if (timer) {
+      clearTimeout(timer)
+      timers.delete(sessionId)
+    }
+    failureCounts.delete(sessionId)
+  }
+
+  function cancelAll(): void {
+    // Bump all generations so in-flight doRefresh calls are invalidated.
+    for (const sessionId of generations.keys()) {
+      nextGeneration(sessionId)
+    }
+    for (const timer of timers.values()) {
+      clearTimeout(timer)
+    }
+    timers.clear()
+    failureCounts.clear()
+  }
+
+  return { schedule, scheduleFromExpiresIn, cancel, cancelAll }
+}

+ 110 - 0
src/bridge/pollConfig.ts

@@ -0,0 +1,110 @@
+import { z } from 'zod/v4'
+import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
+import { lazySchema } from '../utils/lazySchema.js'
+import {
+  DEFAULT_POLL_CONFIG,
+  type PollIntervalConfig,
+} from './pollConfigDefaults.js'
+
+// .min(100) on the seek-work intervals restores the old Math.max(..., 100)
+// defense-in-depth floor against fat-fingered GrowthBook values. Unlike a
+// clamp, Zod rejects the whole object on violation — a config with one bad
+// field falls back to DEFAULT_POLL_CONFIG entirely rather than being
+// partially trusted.
+//
+// The at_capacity intervals use a 0-or-≥100 refinement: 0 means "disabled"
+// (heartbeat-only mode), ≥100 is the fat-finger floor. Values 1–99 are
+// rejected so unit confusion (ops thinks seconds, enters 10) doesn't poll
+// every 10ms against the VerifyEnvironmentSecretAuth DB path.
+//
+// The object-level refines require at least one at-capacity liveness
+// mechanism enabled: heartbeat OR the relevant poll interval. Without this,
+// the hb=0, atCapMs=0 drift config (ops disables heartbeat without
+// restoring at_capacity) falls through every throttle site with no sleep —
+// tight-looping /poll at HTTP-round-trip speed.
+const zeroOrAtLeast100 = {
+  message: 'must be 0 (disabled) or ≥100ms',
+}
+const pollIntervalConfigSchema = lazySchema(() =>
+  z
+    .object({
+      poll_interval_ms_not_at_capacity: z.number().int().min(100),
+      // 0 = no at-capacity polling. Independent of heartbeat — both can be
+      // enabled (heartbeat runs, periodically breaks out to poll).
+      poll_interval_ms_at_capacity: z
+        .number()
+        .int()
+        .refine(v => v === 0 || v >= 100, zeroOrAtLeast100),
+      // 0 = disabled; positive value = heartbeat at this interval while at
+      // capacity. Runs alongside at-capacity polling, not instead of it.
+      // Named non_exclusive to distinguish from the old heartbeat_interval_ms
+      // (either-or semantics in pre-#22145 clients). .default(0) so existing
+      // GrowthBook configs without this field parse successfully.
+      non_exclusive_heartbeat_interval_ms: z.number().int().min(0).default(0),
+      // Multisession (bridgeMain.ts) intervals. Defaults match the
+      // single-session values so existing configs without these fields
+      // preserve current behavior.
+      multisession_poll_interval_ms_not_at_capacity: z
+        .number()
+        .int()
+        .min(100)
+        .default(
+          DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_not_at_capacity,
+        ),
+      multisession_poll_interval_ms_partial_capacity: z
+        .number()
+        .int()
+        .min(100)
+        .default(
+          DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_partial_capacity,
+        ),
+      multisession_poll_interval_ms_at_capacity: z
+        .number()
+        .int()
+        .refine(v => v === 0 || v >= 100, zeroOrAtLeast100)
+        .default(DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_at_capacity),
+      // .min(1) matches the server's ge=1 constraint (work_v1.py:230).
+      reclaim_older_than_ms: z.number().int().min(1).default(5000),
+      session_keepalive_interval_v2_ms: z
+        .number()
+        .int()
+        .min(0)
+        .default(120_000),
+    })
+    .refine(
+      cfg =>
+        cfg.non_exclusive_heartbeat_interval_ms > 0 ||
+        cfg.poll_interval_ms_at_capacity > 0,
+      {
+        message:
+          'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or poll_interval_ms_at_capacity > 0',
+      },
+    )
+    .refine(
+      cfg =>
+        cfg.non_exclusive_heartbeat_interval_ms > 0 ||
+        cfg.multisession_poll_interval_ms_at_capacity > 0,
+      {
+        message:
+          'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or multisession_poll_interval_ms_at_capacity > 0',
+      },
+    ),
+)
+
+/**
+ * Fetch the bridge poll interval config from GrowthBook with a 5-minute
+ * refresh window. Validates the served JSON against the schema; falls back
+ * to defaults if the flag is absent, malformed, or partially-specified.
+ *
+ * Shared by bridgeMain.ts (standalone) and replBridge.ts (REPL) so ops
+ * can tune both poll rates fleet-wide with a single config push.
+ */
+export function getPollIntervalConfig(): PollIntervalConfig {
+  const raw = getFeatureValue_CACHED_WITH_REFRESH<unknown>(
+    'tengu_bridge_poll_interval_config',
+    DEFAULT_POLL_CONFIG,
+    5 * 60 * 1000,
+  )
+  const parsed = pollIntervalConfigSchema().safeParse(raw)
+  return parsed.success ? parsed.data : DEFAULT_POLL_CONFIG
+}

+ 82 - 0
src/bridge/pollConfigDefaults.ts

@@ -0,0 +1,82 @@
+/**
+ * Bridge poll interval defaults. Extracted from pollConfig.ts so callers
+ * that don't need live GrowthBook tuning (daemon via Agent SDK) can avoid
+ * the growthbook.ts → config.ts → file.ts → sessionStorage.ts → commands.ts
+ * transitive dependency chain.
+ */
+
+/**
+ * Poll interval when actively seeking work (no transport / below maxSessions).
+ * Governs user-visible "connecting…" latency on initial work pickup and
+ * recovery speed after the server re-dispatches a work item.
+ */
+const POLL_INTERVAL_MS_NOT_AT_CAPACITY = 2000
+
+/**
+ * Poll interval when the transport is connected. Runs independently of
+ * heartbeat — when both are enabled, the heartbeat loop breaks out to poll
+ * at this interval. Set to 0 to disable at-capacity polling entirely.
+ *
+ * Server-side constraints that bound this value:
+ * - BRIDGE_LAST_POLL_TTL = 4h (Redis key expiry → environment auto-archived)
+ * - max_poll_stale_seconds = 24h (session-creation health gate, currently disabled)
+ *
+ * 10 minutes gives 24× headroom on the Redis TTL while still picking up
+ * server-initiated token-rotation redispatches within one poll cycle.
+ * The transport auto-reconnects internally for 10 minutes on transient WS
+ * failures, so poll is not the recovery path — it's strictly a liveness
+ * signal plus a backstop for permanent close.
+ */
+const POLL_INTERVAL_MS_AT_CAPACITY = 600_000
+
+/**
+ * Multisession bridge (bridgeMain.ts) poll intervals. Defaults match the
+ * single-session values so existing GrowthBook configs without these fields
+ * preserve current behavior. Ops can tune these independently via the
+ * tengu_bridge_poll_interval_config GB flag.
+ */
+const MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY =
+  POLL_INTERVAL_MS_NOT_AT_CAPACITY
+const MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY =
+  POLL_INTERVAL_MS_NOT_AT_CAPACITY
+const MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY = POLL_INTERVAL_MS_AT_CAPACITY
+
+export type PollIntervalConfig = {
+  poll_interval_ms_not_at_capacity: number
+  poll_interval_ms_at_capacity: number
+  non_exclusive_heartbeat_interval_ms: number
+  multisession_poll_interval_ms_not_at_capacity: number
+  multisession_poll_interval_ms_partial_capacity: number
+  multisession_poll_interval_ms_at_capacity: number
+  reclaim_older_than_ms: number
+  session_keepalive_interval_v2_ms: number
+}
+
+export const DEFAULT_POLL_CONFIG: PollIntervalConfig = {
+  poll_interval_ms_not_at_capacity: POLL_INTERVAL_MS_NOT_AT_CAPACITY,
+  poll_interval_ms_at_capacity: POLL_INTERVAL_MS_AT_CAPACITY,
+  // 0 = disabled. When > 0, at-capacity loops send per-work-item heartbeats
+  // at this interval. Independent of poll_interval_ms_at_capacity — both may
+  // run (heartbeat periodically yields to poll). 60s gives 5× headroom under
+  // the server's 300s heartbeat TTL. Named non_exclusive to distinguish from
+  // the old heartbeat_interval_ms field (either-or semantics in pre-#22145
+  // clients — heartbeat suppressed poll). Old clients ignore this key; ops
+  // can set both fields during rollout.
+  non_exclusive_heartbeat_interval_ms: 0,
+  multisession_poll_interval_ms_not_at_capacity:
+    MULTISESSION_POLL_INTERVAL_MS_NOT_AT_CAPACITY,
+  multisession_poll_interval_ms_partial_capacity:
+    MULTISESSION_POLL_INTERVAL_MS_PARTIAL_CAPACITY,
+  multisession_poll_interval_ms_at_capacity:
+    MULTISESSION_POLL_INTERVAL_MS_AT_CAPACITY,
+  // Poll query param: reclaim unacknowledged work items older than this.
+  // Matches the server's DEFAULT_RECLAIM_OLDER_THAN_MS (work_service.py:24).
+  // Enables picking up stale-pending work after JWT expiry, when the prior
+  // ack failed because the session_ingress_token was already stale.
+  reclaim_older_than_ms: 5000,
+  // 0 = disabled. When > 0, push a silent {type:'keep_alive'} frame to
+  // session-ingress at this interval so upstream proxies don't GC an idle
+  // remote-control session. 2 min is the default. _v2: bridge-only gate
+  // (pre-v2 clients read the old key, new clients ignore it).
+  session_keepalive_interval_v2_ms: 120_000,
+}

+ 1008 - 0
src/bridge/remoteBridgeCore.ts

@@ -0,0 +1,1008 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+/**
+ * Env-less Remote Control bridge core.
+ *
+ * "Env-less" = no Environments API layer. Distinct from "CCR v2" (the
+ * /worker/* transport protocol) — the env-based path (replBridge.ts) can also
+ * use CCR v2 transport via CLAUDE_CODE_USE_CCR_V2. This file is about removing
+ * the poll/dispatch layer, not about which transport protocol is underneath.
+ *
+ * Unlike initBridgeCore (env-based, ~2400 lines), this connects directly
+ * to the session-ingress layer without the Environments API work-dispatch
+ * layer:
+ *
+ *   1. POST /v1/code/sessions              (OAuth, no env_id)  → session.id
+ *   2. POST /v1/code/sessions/{id}/bridge  (OAuth)             → {worker_jwt, expires_in, api_base_url, worker_epoch}
+ *      Each /bridge call bumps epoch — it IS the register. No separate /worker/register.
+ *   3. createV2ReplTransport(worker_jwt, worker_epoch)         → SSE + CCRClient
+ *   4. createTokenRefreshScheduler                             → proactive /bridge re-call (new JWT + new epoch)
+ *   5. 401 on SSE → rebuild transport with fresh /bridge credentials (same seq-num)
+ *
+ * No register/poll/ack/stop/heartbeat/deregister environment lifecycle.
+ * The Environments API historically existed because CCR's /worker/*
+ * endpoints required a session_id+role=worker JWT that only the work-dispatch
+ * layer could mint. Server PR #292605 (renamed in #293280) adds the /bridge endpoint as a direct
+ * OAuth→worker_jwt exchange, making the env layer optional for REPL sessions.
+ *
+ * Gated by `tengu_bridge_repl_v2` GrowthBook flag in initReplBridge.ts.
+ * REPL-only — daemon/print stay on env-based.
+ */
+
+import { feature } from 'bun:bundle'
+import axios from 'axios'
+import {
+  createV2ReplTransport,
+  type ReplBridgeTransport,
+} from './replBridgeTransport.js'
+import { buildCCRv2SdkUrl } from './workSecret.js'
+import { toCompatSessionId } from './sessionIdCompat.js'
+import { FlushGate } from './flushGate.js'
+import { createTokenRefreshScheduler } from './jwtUtils.js'
+import { getTrustedDeviceToken } from './trustedDevice.js'
+import {
+  getEnvLessBridgeConfig,
+  type EnvLessBridgeConfig,
+} from './envLessBridgeConfig.js'
+import {
+  handleIngressMessage,
+  handleServerControlRequest,
+  makeResultMessage,
+  isEligibleBridgeMessage,
+  extractTitleText,
+  BoundedUUIDSet,
+} from './bridgeMessaging.js'
+import { logBridgeSkip } from './debugUtils.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { isInProtectedNamespace } from '../utils/envUtils.js'
+import { errorMessage } from '../utils/errors.js'
+import { sleep } from '../utils/sleep.js'
+import { registerCleanup } from '../utils/cleanupRegistry.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import type { ReplBridgeHandle, BridgeState } from './replBridge.js'
+import type { Message } from '../types/message.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+} from '../entrypoints/sdk/controlTypes.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+
+const ANTHROPIC_VERSION = '2023-06-01'
+
+// Telemetry discriminator for ws_connected. 'initial' is the default and
+// never passed to rebuildTransport (which can only be called post-init);
+// Exclude<> makes that constraint explicit at both signatures.
+type ConnectCause = 'initial' | 'proactive_refresh' | 'auth_401_recovery'
+
+function oauthHeaders(accessToken: string): Record<string, string> {
+  return {
+    Authorization: `Bearer ${accessToken}`,
+    'Content-Type': 'application/json',
+    'anthropic-version': ANTHROPIC_VERSION,
+  }
+}
+
+export type EnvLessBridgeParams = {
+  baseUrl: string
+  orgUUID: string
+  title: string
+  getAccessToken: () => string | undefined
+  onAuth401?: (staleAccessToken: string) => Promise<boolean>
+  /**
+   * Converts internal Message[] → SDKMessage[] for writeMessages() and the
+   * initial-flush/drain paths. Injected rather than imported — mappers.ts
+   * transitively pulls in src/commands.ts (entire command registry + React
+   * tree) which would bloat bundles that don't already have it.
+   */
+  toSDKMessages: (messages: Message[]) => SDKMessage[]
+  initialHistoryCap: number
+  initialMessages?: Message[]
+  onInboundMessage?: (msg: SDKMessage) => void | Promise<void>
+  /**
+   * Fired on each title-worthy user message seen in writeMessages() until
+   * the callback returns true (done). Mirrors replBridge.ts's onUserMessage —
+   * caller derives a title and PATCHes /v1/sessions/{id} so auto-started
+   * sessions don't stay at the generic fallback. The caller owns the
+   * derive-at-count-1-and-3 policy; the transport just keeps calling until
+   * told to stop. sessionId is the raw cse_* — updateBridgeSessionTitle
+   * retags internally.
+   */
+  onUserMessage?: (text: string, sessionId: string) => boolean
+  onPermissionResponse?: (response: SDKControlResponse) => void
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  /**
+   * When true, skip opening the SSE read stream — only the CCRClient write
+   * path is activated. Threaded to createV2ReplTransport and
+   * handleServerControlRequest.
+   */
+  outboundOnly?: boolean
+  /** Free-form tags for session categorization (e.g. ['ccr-mirror']). */
+  tags?: string[]
+}
+
+/**
+ * Create a session, fetch a worker JWT, connect the v2 transport.
+ *
+ * Returns null on any pre-flight failure (session create failed, /bridge
+ * failed, transport setup failed). Caller (initReplBridge) surfaces this
+ * as a generic "initialization failed" state.
+ */
+export async function initEnvLessBridgeCore(
+  params: EnvLessBridgeParams,
+): Promise<ReplBridgeHandle | null> {
+  const {
+    baseUrl,
+    orgUUID,
+    title,
+    getAccessToken,
+    onAuth401,
+    toSDKMessages,
+    initialHistoryCap,
+    initialMessages,
+    onInboundMessage,
+    onUserMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    outboundOnly,
+    tags,
+  } = params
+
+  const cfg = await getEnvLessBridgeConfig()
+
+  // ── 1. Create session (POST /v1/code/sessions, no env_id) ───────────────
+  const accessToken = getAccessToken()
+  if (!accessToken) {
+    logForDebugging('[remote-bridge] No OAuth token')
+    return null
+  }
+
+  const createdSessionId = await withRetry(
+    () =>
+      createCodeSession(baseUrl, accessToken, title, cfg.http_timeout_ms, tags),
+    'createCodeSession',
+    cfg,
+  )
+  if (!createdSessionId) {
+    onStateChange?.('failed', 'Session creation failed — see debug log')
+    logBridgeSkip('v2_session_create_failed', undefined, true)
+    return null
+  }
+  const sessionId: string = createdSessionId
+  logForDebugging(`[remote-bridge] Created session ${sessionId}`)
+  logForDiagnosticsNoPII('info', 'bridge_repl_v2_session_created')
+
+  // ── 2. Fetch bridge credentials (POST /bridge → worker_jwt, expires_in, api_base_url) ──
+  const credentials = await withRetry(
+    () =>
+      fetchRemoteCredentials(
+        sessionId,
+        baseUrl,
+        accessToken,
+        cfg.http_timeout_ms,
+      ),
+    'fetchRemoteCredentials',
+    cfg,
+  )
+  if (!credentials) {
+    onStateChange?.('failed', 'Remote credentials fetch failed — see debug log')
+    logBridgeSkip('v2_remote_creds_failed', undefined, true)
+    void archiveSession(
+      sessionId,
+      baseUrl,
+      accessToken,
+      orgUUID,
+      cfg.http_timeout_ms,
+    )
+    return null
+  }
+  logForDebugging(
+    `[remote-bridge] Fetched bridge credentials (expires_in=${credentials.expires_in}s)`,
+  )
+
+  // ── 3. Build v2 transport (SSETransport + CCRClient) ────────────────────
+  const sessionUrl = buildCCRv2SdkUrl(credentials.api_base_url, sessionId)
+  logForDebugging(`[remote-bridge] v2 session URL: ${sessionUrl}`)
+
+  let transport: ReplBridgeTransport
+  try {
+    transport = await createV2ReplTransport({
+      sessionUrl,
+      ingressToken: credentials.worker_jwt,
+      sessionId,
+      epoch: credentials.worker_epoch,
+      heartbeatIntervalMs: cfg.heartbeat_interval_ms,
+      heartbeatJitterFraction: cfg.heartbeat_jitter_fraction,
+      // Per-instance closure — keeps the worker JWT out of
+      // process.env.CLAUDE_CODE_SESSION_ACCESS_TOKEN, which mcp/client.ts
+      // reads ungatedly and would otherwise send to user-configured ws/http
+      // MCP servers. Frozen-at-construction is correct: transport is fully
+      // rebuilt on refresh (rebuildTransport below).
+      getAuthToken: () => credentials.worker_jwt,
+      outboundOnly,
+    })
+  } catch (err) {
+    logForDebugging(
+      `[remote-bridge] v2 transport setup failed: ${errorMessage(err)}`,
+      { level: 'error' },
+    )
+    onStateChange?.('failed', `Transport setup failed: ${errorMessage(err)}`)
+    logBridgeSkip('v2_transport_setup_failed', undefined, true)
+    void archiveSession(
+      sessionId,
+      baseUrl,
+      accessToken,
+      orgUUID,
+      cfg.http_timeout_ms,
+    )
+    return null
+  }
+  logForDebugging(
+    `[remote-bridge] v2 transport created (epoch=${credentials.worker_epoch})`,
+  )
+  onStateChange?.('ready')
+
+  // ── 4. State ────────────────────────────────────────────────────────────
+
+  // Echo dedup: messages we POST come back on the read stream. Seeded with
+  // initial message UUIDs so server echoes of flushed history are recognized.
+  // Both sets cover initial UUIDs — recentPostedUUIDs is a 2000-cap ring buffer
+  // and could evict them after enough live writes; initialMessageUUIDs is the
+  // unbounded fallback. Defense-in-depth; mirrors replBridge.ts.
+  const recentPostedUUIDs = new BoundedUUIDSet(cfg.uuid_dedup_buffer_size)
+  const initialMessageUUIDs = new Set<string>()
+  if (initialMessages) {
+    for (const msg of initialMessages) {
+      initialMessageUUIDs.add(msg.uuid)
+      recentPostedUUIDs.add(msg.uuid)
+    }
+  }
+
+  // Defensive dedup for re-delivered inbound prompts (seq-num negotiation
+  // edge cases, server history replay after transport swap).
+  const recentInboundUUIDs = new BoundedUUIDSet(cfg.uuid_dedup_buffer_size)
+
+  // FlushGate: queue live writes while the history flush POST is in flight,
+  // so the server receives [history..., live...] in order.
+  const flushGate = new FlushGate<Message>()
+
+  let initialFlushDone = false
+  let tornDown = false
+  let authRecoveryInFlight = false
+  // Latch for onUserMessage — flips true when the callback returns true
+  // (policy says "done deriving"). sessionId is const (no re-create path —
+  // rebuildTransport swaps JWT/epoch, same session), so no reset needed.
+  let userMessageCallbackDone = !onUserMessage
+
+  // Telemetry: why did onConnect fire? Set by rebuildTransport before
+  // wireTransportCallbacks; read asynchronously by onConnect. Race-safe
+  // because authRecoveryInFlight serializes rebuild callers, and a fresh
+  // initEnvLessBridgeCore() call gets a fresh closure defaulting to 'initial'.
+  let connectCause: ConnectCause = 'initial'
+
+  // Deadline for onConnect after transport.connect(). Cleared by onConnect
+  // (connected) and onClose (got a close — not silent). If neither fires
+  // before cfg.connect_timeout_ms, onConnectTimeout emits — the only
+  // signal for the `started → (silence)` gap.
+  let connectDeadline: ReturnType<typeof setTimeout> | undefined
+  function onConnectTimeout(cause: ConnectCause): void {
+    if (tornDown) return
+    logEvent('tengu_bridge_repl_connect_timeout', {
+      v2: true,
+      elapsed_ms: cfg.connect_timeout_ms,
+      cause:
+        cause as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+  }
+
+  // ── 5. JWT refresh scheduler ────────────────────────────────────────────
+  // Schedule a callback 5min before expiry (per response.expires_in). On fire,
+  // re-fetch /bridge with OAuth → rebuild transport with fresh credentials.
+  // Each /bridge call bumps epoch server-side, so a JWT-only swap would leave
+  // the old CCRClient heartbeating with a stale epoch → 409 within 20s.
+  // JWT is opaque — do not decode.
+  const refresh = createTokenRefreshScheduler({
+    refreshBufferMs: cfg.token_refresh_buffer_ms,
+    getAccessToken: async () => {
+      // Unconditionally refresh OAuth before calling /bridge — getAccessToken()
+      // returns expired tokens as non-null strings (doesn't check expiresAt),
+      // so truthiness doesn't mean valid. Pass the stale token to onAuth401
+      // so handleOAuth401Error's keychain-comparison can detect parallel refresh.
+      const stale = getAccessToken()
+      if (onAuth401) await onAuth401(stale ?? '')
+      return getAccessToken() ?? stale
+    },
+    onRefresh: (sid, oauthToken) => {
+      void (async () => {
+        // Laptop wake: overdue proactive timer + SSE 401 fire ~simultaneously.
+        // Claim the flag BEFORE the /bridge fetch so the other path skips
+        // entirely — prevents double epoch bump (each /bridge call bumps; if
+        // both fetch, the first rebuild gets a stale epoch and 409s).
+        if (authRecoveryInFlight || tornDown) {
+          logForDebugging(
+            '[remote-bridge] Recovery already in flight, skipping proactive refresh',
+          )
+          return
+        }
+        authRecoveryInFlight = true
+        try {
+          const fresh = await withRetry(
+            () =>
+              fetchRemoteCredentials(
+                sid,
+                baseUrl,
+                oauthToken,
+                cfg.http_timeout_ms,
+              ),
+            'fetchRemoteCredentials (proactive)',
+            cfg,
+          )
+          if (!fresh || tornDown) return
+          await rebuildTransport(fresh, 'proactive_refresh')
+          logForDebugging(
+            '[remote-bridge] Transport rebuilt (proactive refresh)',
+          )
+        } catch (err) {
+          logForDebugging(
+            `[remote-bridge] Proactive refresh rebuild failed: ${errorMessage(err)}`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII(
+            'error',
+            'bridge_repl_v2_proactive_refresh_failed',
+          )
+          if (!tornDown) {
+            onStateChange?.('failed', `Refresh failed: ${errorMessage(err)}`)
+          }
+        } finally {
+          authRecoveryInFlight = false
+        }
+      })()
+    },
+    label: 'remote',
+  })
+  refresh.scheduleFromExpiresIn(sessionId, credentials.expires_in)
+
+  // ── 6. Wire callbacks (extracted so transport-rebuild can re-wire) ──────
+  function wireTransportCallbacks(): void {
+    transport.setOnConnect(() => {
+      clearTimeout(connectDeadline)
+      logForDebugging('[remote-bridge] v2 transport connected')
+      logForDiagnosticsNoPII('info', 'bridge_repl_v2_transport_connected')
+      logEvent('tengu_bridge_repl_ws_connected', {
+        v2: true,
+        cause:
+          connectCause as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+
+      if (!initialFlushDone && initialMessages && initialMessages.length > 0) {
+        initialFlushDone = true
+        // Capture current transport — if 401/teardown happens mid-flush,
+        // the stale .finally() must not drain the gate or signal connected.
+        // (Same guard pattern as replBridge.ts:1119.)
+        const flushTransport = transport
+        void flushHistory(initialMessages)
+          .catch(e =>
+            logForDebugging(`[remote-bridge] flushHistory failed: ${e}`),
+          )
+          .finally(() => {
+            // authRecoveryInFlight catches the v1-vs-v2 asymmetry: v1 nulls
+            // transport synchronously in setOnClose (replBridge.ts:1175), so
+            // transport !== flushTransport trips immediately. v2 doesn't null —
+            // transport reassigned only at rebuildTransport:346, 3 awaits deep.
+            // authRecoveryInFlight is set synchronously at rebuildTransport entry.
+            if (
+              transport !== flushTransport ||
+              tornDown ||
+              authRecoveryInFlight
+            ) {
+              return
+            }
+            drainFlushGate()
+            onStateChange?.('connected')
+          })
+      } else if (!flushGate.active) {
+        onStateChange?.('connected')
+      }
+    })
+
+    transport.setOnData((data: string) => {
+      handleIngressMessage(
+        data,
+        recentPostedUUIDs,
+        recentInboundUUIDs,
+        onInboundMessage,
+        // Remote client answered the permission prompt — the turn resumes.
+        // Without this the server stays on requires_action until the next
+        // user message or turn-end result.
+        onPermissionResponse
+          ? res => {
+              transport.reportState('running')
+              onPermissionResponse(res)
+            }
+          : undefined,
+        req =>
+          handleServerControlRequest(req, {
+            transport,
+            sessionId,
+            onInterrupt,
+            onSetModel,
+            onSetMaxThinkingTokens,
+            onSetPermissionMode,
+            outboundOnly,
+          }),
+      )
+    })
+
+    transport.setOnClose((code?: number) => {
+      clearTimeout(connectDeadline)
+      if (tornDown) return
+      logForDebugging(`[remote-bridge] v2 transport closed (code=${code})`)
+      logEvent('tengu_bridge_repl_ws_closed', { code, v2: true })
+      // onClose fires only for TERMINAL failures: 401 (JWT invalid),
+      // 4090 (CCR epoch mismatch), 4091 (CCR init failed), or SSE 10-min
+      // reconnect budget exhausted. Transient disconnects are handled
+      // transparently inside SSETransport. 401 we can recover from (fetch
+      // fresh JWT, rebuild transport); all other codes are dead-ends.
+      if (code === 401 && !authRecoveryInFlight) {
+        void recoverFromAuthFailure()
+        return
+      }
+      onStateChange?.('failed', `Transport closed (code ${code})`)
+    })
+  }
+
+  // ── 7. Transport rebuild (shared by proactive refresh + 401 recovery) ──
+  // Every /bridge call bumps epoch server-side. Both refresh paths must
+  // rebuild the transport with the new epoch — a JWT-only swap leaves the
+  // old CCRClient heartbeating stale epoch → 409. SSE resumes from the old
+  // transport's high-water-mark seq-num so no server-side replay.
+  // Caller MUST set authRecoveryInFlight = true before calling (synchronously,
+  // before any await) and clear it in a finally. This function doesn't manage
+  // the flag — moving it here would be too late to prevent a double /bridge
+  // fetch, and each fetch bumps epoch.
+  async function rebuildTransport(
+    fresh: RemoteCredentials,
+    cause: Exclude<ConnectCause, 'initial'>,
+  ): Promise<void> {
+    connectCause = cause
+    // Queue writes during rebuild — once /bridge returns, the old transport's
+    // epoch is stale and its next write/heartbeat 409s. Without this gate,
+    // writeMessages adds UUIDs to recentPostedUUIDs then writeBatch silently
+    // no-ops (closed uploader after 409) → permanent silent message loss.
+    flushGate.start()
+    try {
+      const seq = transport.getLastSequenceNum()
+      transport.close()
+      transport = await createV2ReplTransport({
+        sessionUrl: buildCCRv2SdkUrl(fresh.api_base_url, sessionId),
+        ingressToken: fresh.worker_jwt,
+        sessionId,
+        epoch: fresh.worker_epoch,
+        heartbeatIntervalMs: cfg.heartbeat_interval_ms,
+        heartbeatJitterFraction: cfg.heartbeat_jitter_fraction,
+        initialSequenceNum: seq,
+        getAuthToken: () => fresh.worker_jwt,
+        outboundOnly,
+      })
+      if (tornDown) {
+        // Teardown fired during the async createV2ReplTransport window.
+        // Don't wire/connect/schedule — we'd re-arm timers after cancelAll()
+        // and fire onInboundMessage into a torn-down bridge.
+        transport.close()
+        return
+      }
+      wireTransportCallbacks()
+      transport.connect()
+      connectDeadline = setTimeout(
+        onConnectTimeout,
+        cfg.connect_timeout_ms,
+        connectCause,
+      )
+      refresh.scheduleFromExpiresIn(sessionId, fresh.expires_in)
+      // Drain queued writes into the new uploader. Runs before
+      // ccr.initialize() resolves (transport.connect() is fire-and-forget),
+      // but the uploader serializes behind the initial PUT /worker. If
+      // init fails (4091), events drop — but only recentPostedUUIDs
+      // (per-instance) is populated, so re-enabling the bridge re-flushes.
+      drainFlushGate()
+    } finally {
+      // End the gate on failure paths too — drainFlushGate already ended
+      // it on success. Queued messages are dropped (transport still dead).
+      flushGate.drop()
+    }
+  }
+
+  // ── 8. 401 recovery (OAuth refresh + rebuild) ───────────────────────────
+  async function recoverFromAuthFailure(): Promise<void> {
+    // setOnClose already guards `!authRecoveryInFlight` but that check and
+    // this set must be atomic against onRefresh — claim synchronously before
+    // any await. Laptop wake fires both paths ~simultaneously.
+    if (authRecoveryInFlight) return
+    authRecoveryInFlight = true
+    onStateChange?.('reconnecting', 'JWT expired — refreshing')
+    logForDebugging('[remote-bridge] 401 on SSE — attempting JWT refresh')
+    try {
+      // Unconditionally try OAuth refresh — getAccessToken() returns expired
+      // tokens as non-null strings, so !oauthToken doesn't catch expiry.
+      // Pass the stale token so handleOAuth401Error's keychain-comparison
+      // can detect if another tab already refreshed.
+      const stale = getAccessToken()
+      if (onAuth401) await onAuth401(stale ?? '')
+      const oauthToken = getAccessToken() ?? stale
+      if (!oauthToken || tornDown) {
+        if (!tornDown) {
+          onStateChange?.('failed', 'JWT refresh failed: no OAuth token')
+        }
+        return
+      }
+
+      const fresh = await withRetry(
+        () =>
+          fetchRemoteCredentials(
+            sessionId,
+            baseUrl,
+            oauthToken,
+            cfg.http_timeout_ms,
+          ),
+        'fetchRemoteCredentials (recovery)',
+        cfg,
+      )
+      if (!fresh || tornDown) {
+        if (!tornDown) {
+          onStateChange?.('failed', 'JWT refresh failed after 401')
+        }
+        return
+      }
+      // If 401 interrupted the initial flush, writeBatch may have silently
+      // no-op'd on the closed uploader (ccr.close() ran in the SSE wrapper
+      // before our setOnClose callback). Reset so the new onConnect re-flushes.
+      // (v1 scopes initialFlushDone inside the per-transport closure at
+      // replBridge.ts:1027 so it resets naturally; v2 has it at outer scope.)
+      initialFlushDone = false
+      await rebuildTransport(fresh, 'auth_401_recovery')
+      logForDebugging('[remote-bridge] Transport rebuilt after 401')
+    } catch (err) {
+      logForDebugging(
+        `[remote-bridge] 401 recovery failed: ${errorMessage(err)}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'bridge_repl_v2_jwt_refresh_failed')
+      if (!tornDown) {
+        onStateChange?.('failed', `JWT refresh failed: ${errorMessage(err)}`)
+      }
+    } finally {
+      authRecoveryInFlight = false
+    }
+  }
+
+  wireTransportCallbacks()
+
+  // Start flushGate BEFORE connect so writeMessages() during handshake
+  // queues instead of racing the history POST.
+  if (initialMessages && initialMessages.length > 0) {
+    flushGate.start()
+  }
+  transport.connect()
+  connectDeadline = setTimeout(
+    onConnectTimeout,
+    cfg.connect_timeout_ms,
+    connectCause,
+  )
+
+  // ── 8. History flush + drain helpers ────────────────────────────────────
+  function drainFlushGate(): void {
+    const msgs = flushGate.end()
+    if (msgs.length === 0) return
+    for (const msg of msgs) recentPostedUUIDs.add(msg.uuid)
+    const events = toSDKMessages(msgs).map(m => ({
+      ...m,
+      session_id: sessionId,
+    }))
+    if (msgs.some(m => m.type === 'user')) {
+      transport.reportState('running')
+    }
+    logForDebugging(
+      `[remote-bridge] Drained ${msgs.length} queued message(s) after flush`,
+    )
+    void transport.writeBatch(events)
+  }
+
+  async function flushHistory(msgs: Message[]): Promise<void> {
+    // v2 always creates a fresh server session (unconditional createCodeSession
+    // above) — no session reuse, no double-post risk. Unlike v1, we do NOT
+    // filter by previouslyFlushedUUIDs: that set persists across REPL enable/
+    // disable cycles (useRef), so it would wrongly suppress history on re-enable.
+    const eligible = msgs.filter(isEligibleBridgeMessage)
+    const capped =
+      initialHistoryCap > 0 && eligible.length > initialHistoryCap
+        ? eligible.slice(-initialHistoryCap)
+        : eligible
+    if (capped.length < eligible.length) {
+      logForDebugging(
+        `[remote-bridge] Capped initial flush: ${eligible.length} -> ${capped.length} (cap=${initialHistoryCap})`,
+      )
+    }
+    const events = toSDKMessages(capped).map(m => ({
+      ...m,
+      session_id: sessionId,
+    }))
+    if (events.length === 0) return
+    // Mid-turn init: if Remote Control is enabled while a query is running,
+    // the last eligible message is a user prompt or tool_result (both 'user'
+    // type). Without this the init PUT's 'idle' sticks until the next user-
+    // type message forwards via writeMessages — which for a pure-text turn
+    // is never (only assistant chunks stream post-init). Check eligible (pre-
+    // cap), not capped: the cap may truncate to a user message even when the
+    // actual trailing message is assistant.
+    if (eligible.at(-1)?.type === 'user') {
+      transport.reportState('running')
+    }
+    logForDebugging(`[remote-bridge] Flushing ${events.length} history events`)
+    await transport.writeBatch(events)
+  }
+
+  // ── 9. Teardown ───────────────────────────────────────────────────────────
+  // On SIGINT/SIGTERM/⁠/exit, gracefulShutdown races runCleanupFunctions()
+  // against a 2s cap before forceExit kills the process. Budget accordingly:
+  //   - archive: teardown_archive_timeout_ms (default 1500, cap 2000)
+  //   - result write: fire-and-forget, archive latency covers the drain
+  //   - 401 retry: only if first archive 401s, shares the same budget
+  async function teardown(): Promise<void> {
+    if (tornDown) return
+    tornDown = true
+    refresh.cancelAll()
+    clearTimeout(connectDeadline)
+    flushGate.drop()
+
+    // Fire the result message before archive — transport.write() only awaits
+    // enqueue (SerialBatchEventUploader resolves once buffered, drain is
+    // async). Archiving before close() gives the uploader's drain loop a
+    // window (typical archive ≈ 100-500ms) to POST the result without an
+    // explicit sleep. close() sets closed=true which interrupts drain at the
+    // next while-check, so close-before-archive drops the result.
+    transport.reportState('idle')
+    void transport.write(makeResultMessage(sessionId))
+
+    let token = getAccessToken()
+    let status = await archiveSession(
+      sessionId,
+      baseUrl,
+      token,
+      orgUUID,
+      cfg.teardown_archive_timeout_ms,
+    )
+
+    // Token is usually fresh (refresh scheduler runs 5min before expiry) but
+    // laptop-wake past the refresh window leaves getAccessToken() returning a
+    // stale string. Retry once on 401 — onAuth401 (= handleOAuth401Error)
+    // clears keychain cache + force-refreshes. No proactive refresh on the
+    // happy path: handleOAuth401Error force-refreshes even valid tokens,
+    // which would waste budget 99% of the time. try/catch mirrors
+    // recoverFromAuthFailure: keychain reads can throw (macOS locked after
+    // wake); an uncaught throw here would skip transport.close + telemetry.
+    if (status === 401 && onAuth401) {
+      try {
+        await onAuth401(token ?? '')
+        token = getAccessToken()
+        status = await archiveSession(
+          sessionId,
+          baseUrl,
+          token,
+          orgUUID,
+          cfg.teardown_archive_timeout_ms,
+        )
+      } catch (err) {
+        logForDebugging(
+          `[remote-bridge] Teardown 401 retry threw: ${errorMessage(err)}`,
+          { level: 'error' },
+        )
+      }
+    }
+
+    transport.close()
+
+    const archiveStatus: ArchiveTelemetryStatus =
+      status === 'no_token'
+        ? 'skipped_no_token'
+        : status === 'timeout' || status === 'error'
+          ? 'network_error'
+          : status >= 500
+            ? 'server_5xx'
+            : status >= 400
+              ? 'server_4xx'
+              : 'ok'
+
+    logForDebugging(`[remote-bridge] Torn down (archive=${status})`)
+    logForDiagnosticsNoPII('info', 'bridge_repl_v2_teardown')
+    logEvent(
+      feature('CCR_MIRROR') && outboundOnly
+        ? 'tengu_ccr_mirror_teardown'
+        : 'tengu_bridge_repl_teardown',
+      {
+        v2: true,
+        archive_status:
+          archiveStatus as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        archive_ok: typeof status === 'number' && status < 400,
+        archive_http_status: typeof status === 'number' ? status : undefined,
+        archive_timeout: status === 'timeout',
+        archive_no_token: status === 'no_token',
+      },
+    )
+  }
+  const unregister = registerCleanup(teardown)
+
+  if (feature('CCR_MIRROR') && outboundOnly) {
+    logEvent('tengu_ccr_mirror_started', {
+      v2: true,
+      expires_in_s: credentials.expires_in,
+    })
+  } else {
+    logEvent('tengu_bridge_repl_started', {
+      has_initial_messages: !!(initialMessages && initialMessages.length > 0),
+      v2: true,
+      expires_in_s: credentials.expires_in,
+      inProtectedNamespace: isInProtectedNamespace(),
+    })
+  }
+
+  // ── 10. Handle ──────────────────────────────────────────────────────────
+  return {
+    bridgeSessionId: sessionId,
+    environmentId: '',
+    sessionIngressUrl: credentials.api_base_url,
+    writeMessages(messages) {
+      const filtered = messages.filter(
+        m =>
+          isEligibleBridgeMessage(m) &&
+          !initialMessageUUIDs.has(m.uuid) &&
+          !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+
+      // Fire onUserMessage for title derivation. Scan before the flushGate
+      // check — prompts are title-worthy even if they queue. Keeps calling
+      // on every title-worthy message until the callback returns true; the
+      // caller owns the policy (derive at 1st and 3rd, skip if explicit).
+      if (!userMessageCallbackDone) {
+        for (const m of filtered) {
+          const text = extractTitleText(m)
+          if (text !== undefined && onUserMessage?.(text, sessionId)) {
+            userMessageCallbackDone = true
+            break
+          }
+        }
+      }
+
+      if (flushGate.enqueue(...filtered)) {
+        logForDebugging(
+          `[remote-bridge] Queued ${filtered.length} message(s) during flush`,
+        )
+        return
+      }
+
+      for (const msg of filtered) recentPostedUUIDs.add(msg.uuid)
+      const events = toSDKMessages(filtered).map(m => ({
+        ...m,
+        session_id: sessionId,
+      }))
+      // v2 does not derive worker_status from events server-side (unlike v1
+      // session-ingress session_status_updater.go). Push it from here so the
+      // CCR web session list shows Running instead of stuck on Idle. A user
+      // message in the batch marks turn start. CCRClient.reportState dedupes
+      // consecutive same-state pushes.
+      if (filtered.some(m => m.type === 'user')) {
+        transport.reportState('running')
+      }
+      logForDebugging(`[remote-bridge] Sending ${filtered.length} message(s)`)
+      void transport.writeBatch(events)
+    },
+    writeSdkMessages(messages: SDKMessage[]) {
+      const filtered = messages.filter(
+        m => !m.uuid || !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+      for (const msg of filtered) {
+        if (msg.uuid) recentPostedUUIDs.add(msg.uuid)
+      }
+      const events = filtered.map(m => ({ ...m, session_id: sessionId }))
+      void transport.writeBatch(events)
+    },
+    sendControlRequest(request: SDKControlRequest) {
+      if (authRecoveryInFlight) {
+        logForDebugging(
+          `[remote-bridge] Dropping control_request during 401 recovery: ${request.request_id}`,
+        )
+        return
+      }
+      const event = { ...request, session_id: sessionId }
+      if (request.request.subtype === 'can_use_tool') {
+        transport.reportState('requires_action')
+      }
+      void transport.write(event)
+      logForDebugging(
+        `[remote-bridge] Sent control_request request_id=${request.request_id}`,
+      )
+    },
+    sendControlResponse(response: SDKControlResponse) {
+      if (authRecoveryInFlight) {
+        logForDebugging(
+          '[remote-bridge] Dropping control_response during 401 recovery',
+        )
+        return
+      }
+      const event = { ...response, session_id: sessionId }
+      transport.reportState('running')
+      void transport.write(event)
+      logForDebugging('[remote-bridge] Sent control_response')
+    },
+    sendControlCancelRequest(requestId: string) {
+      if (authRecoveryInFlight) {
+        logForDebugging(
+          `[remote-bridge] Dropping control_cancel_request during 401 recovery: ${requestId}`,
+        )
+        return
+      }
+      const event = {
+        type: 'control_cancel_request' as const,
+        request_id: requestId,
+        session_id: sessionId,
+      }
+      // Hook/classifier/channel/recheck resolved the permission locally —
+      // interactiveHandler calls only cancelRequest (no sendResponse) on
+      // those paths, so without this the server stays on requires_action.
+      transport.reportState('running')
+      void transport.write(event)
+      logForDebugging(
+        `[remote-bridge] Sent control_cancel_request request_id=${requestId}`,
+      )
+    },
+    sendResult() {
+      if (authRecoveryInFlight) {
+        logForDebugging('[remote-bridge] Dropping result during 401 recovery')
+        return
+      }
+      transport.reportState('idle')
+      void transport.write(makeResultMessage(sessionId))
+      logForDebugging(`[remote-bridge] Sent result`)
+    },
+    async teardown() {
+      unregister()
+      await teardown()
+    },
+  }
+}
+
+// ─── Session API (v2 /code/sessions, no env) ─────────────────────────────────
+
+/** Retry an async init call with exponential backoff + jitter. */
+async function withRetry<T>(
+  fn: () => Promise<T | null>,
+  label: string,
+  cfg: EnvLessBridgeConfig,
+): Promise<T | null> {
+  const max = cfg.init_retry_max_attempts
+  for (let attempt = 1; attempt <= max; attempt++) {
+    const result = await fn()
+    if (result !== null) return result
+    if (attempt < max) {
+      const base = cfg.init_retry_base_delay_ms * 2 ** (attempt - 1)
+      const jitter =
+        base * cfg.init_retry_jitter_fraction * (2 * Math.random() - 1)
+      const delay = Math.min(base + jitter, cfg.init_retry_max_delay_ms)
+      logForDebugging(
+        `[remote-bridge] ${label} failed (attempt ${attempt}/${max}), retrying in ${Math.round(delay)}ms`,
+      )
+      await sleep(delay)
+    }
+  }
+  return null
+}
+
+// Moved to codeSessionApi.ts so the SDK /bridge subpath can bundle them
+// without pulling in this file's heavy CLI tree (analytics, transport).
+export {
+  createCodeSession,
+  type RemoteCredentials,
+} from './codeSessionApi.js'
+import {
+  createCodeSession,
+  fetchRemoteCredentials as fetchRemoteCredentialsRaw,
+  type RemoteCredentials,
+} from './codeSessionApi.js'
+import { getBridgeBaseUrlOverride } from './bridgeConfig.js'
+
+// CLI-side wrapper that applies the CLAUDE_BRIDGE_BASE_URL dev override and
+// injects the trusted-device token (both are env/GrowthBook reads that the
+// SDK-facing codeSessionApi.ts export must stay free of).
+export async function fetchRemoteCredentials(
+  sessionId: string,
+  baseUrl: string,
+  accessToken: string,
+  timeoutMs: number,
+): Promise<RemoteCredentials | null> {
+  const creds = await fetchRemoteCredentialsRaw(
+    sessionId,
+    baseUrl,
+    accessToken,
+    timeoutMs,
+    getTrustedDeviceToken(),
+  )
+  if (!creds) return null
+  return getBridgeBaseUrlOverride()
+    ? { ...creds, api_base_url: baseUrl }
+    : creds
+}
+
+type ArchiveStatus = number | 'timeout' | 'error' | 'no_token'
+
+// Single categorical for BQ `GROUP BY archive_status`. The booleans on
+// _teardown predate this and are redundant with it (except archive_timeout,
+// which distinguishes ECONNABORTED from other network errors — both map to
+// 'network_error' here since the dominant cause in a 1.5s window is timeout).
+type ArchiveTelemetryStatus =
+  | 'ok'
+  | 'skipped_no_token'
+  | 'network_error'
+  | 'server_4xx'
+  | 'server_5xx'
+
+async function archiveSession(
+  sessionId: string,
+  baseUrl: string,
+  accessToken: string | undefined,
+  orgUUID: string,
+  timeoutMs: number,
+): Promise<ArchiveStatus> {
+  if (!accessToken) return 'no_token'
+  // Archive lives at the compat layer (/v1/sessions/*, not /v1/code/sessions).
+  // compat.parseSessionID only accepts TagSession (session_*), so retag cse_*.
+  // anthropic-beta + x-organization-uuid are required — without them the
+  // compat gateway 404s before reaching the handler.
+  //
+  // Unlike bridgeMain.ts (which caches compatId in sessionCompatIds to keep
+  // in-memory titledSessions/logger keys consistent across a mid-session
+  // gate flip), this compatId is only a server URL path segment — no
+  // in-memory state. Fresh compute matches whatever the server currently
+  // validates: if the gate is OFF, the server has been updated to accept
+  // cse_* and we correctly send it.
+  const compatId = toCompatSessionId(sessionId)
+  try {
+    const response = await axios.post(
+      `${baseUrl}/v1/sessions/${compatId}/archive`,
+      {},
+      {
+        headers: {
+          ...oauthHeaders(accessToken),
+          'anthropic-beta': 'ccr-byoc-2025-07-29',
+          'x-organization-uuid': orgUUID,
+        },
+        timeout: timeoutMs,
+        validateStatus: () => true,
+      },
+    )
+    logForDebugging(
+      `[remote-bridge] Archive ${compatId} status=${response.status}`,
+    )
+    return response.status
+  } catch (err) {
+    const msg = errorMessage(err)
+    logForDebugging(`[remote-bridge] Archive failed: ${msg}`)
+    return axios.isAxiosError(err) && err.code === 'ECONNABORTED'
+      ? 'timeout'
+      : 'error'
+  }
+}

+ 2406 - 0
src/bridge/replBridge.ts

@@ -0,0 +1,2406 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+import { randomUUID } from 'crypto'
+import {
+  createBridgeApiClient,
+  BridgeFatalError,
+  isExpiredErrorType,
+  isSuppressible403,
+} from './bridgeApi.js'
+import type { BridgeConfig, BridgeApiClient } from './types.js'
+import { logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import { registerCleanup } from '../utils/cleanupRegistry.js'
+import {
+  handleIngressMessage,
+  handleServerControlRequest,
+  makeResultMessage,
+  isEligibleBridgeMessage,
+  extractTitleText,
+  BoundedUUIDSet,
+} from './bridgeMessaging.js'
+import {
+  decodeWorkSecret,
+  buildSdkUrl,
+  buildCCRv2SdkUrl,
+  sameSessionId,
+} from './workSecret.js'
+import { toCompatSessionId, toInfraSessionId } from './sessionIdCompat.js'
+import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
+import { getTrustedDeviceToken } from './trustedDevice.js'
+import { HybridTransport } from '../cli/transports/HybridTransport.js'
+import {
+  type ReplBridgeTransport,
+  createV1ReplTransport,
+  createV2ReplTransport,
+} from './replBridgeTransport.js'
+import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
+import { isEnvTruthy, isInProtectedNamespace } from '../utils/envUtils.js'
+import { validateBridgeId } from './bridgeApi.js'
+import {
+  describeAxiosError,
+  extractHttpStatus,
+  logBridgeSkip,
+} from './debugUtils.js'
+import type { Message } from '../types/message.js'
+import type { SDKMessage } from '../entrypoints/agentSdkTypes.js'
+import type { PermissionMode } from '../utils/permissions/PermissionMode.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+} from '../entrypoints/sdk/controlTypes.js'
+import { createCapacityWake, type CapacitySignal } from './capacityWake.js'
+import { FlushGate } from './flushGate.js'
+import {
+  DEFAULT_POLL_CONFIG,
+  type PollIntervalConfig,
+} from './pollConfigDefaults.js'
+import { errorMessage } from '../utils/errors.js'
+import { sleep } from '../utils/sleep.js'
+import {
+  wrapApiForFaultInjection,
+  registerBridgeDebugHandle,
+  clearBridgeDebugHandle,
+  injectBridgeFault,
+} from './bridgeDebug.js'
+
+export type ReplBridgeHandle = {
+  bridgeSessionId: string
+  environmentId: string
+  sessionIngressUrl: string
+  writeMessages(messages: Message[]): void
+  writeSdkMessages(messages: SDKMessage[]): void
+  sendControlRequest(request: SDKControlRequest): void
+  sendControlResponse(response: SDKControlResponse): void
+  sendControlCancelRequest(requestId: string): void
+  sendResult(): void
+  teardown(): Promise<void>
+}
+
+export type BridgeState = 'ready' | 'connected' | 'reconnecting' | 'failed'
+
+/**
+ * Explicit-param input to initBridgeCore. Everything initReplBridge reads
+ * from bootstrap state (cwd, session ID, git, OAuth) becomes a field here.
+ * A daemon caller (Agent SDK, PR 4) that never runs main.tsx fills these
+ * in itself.
+ */
+export type BridgeCoreParams = {
+  dir: string
+  machineName: string
+  branch: string
+  gitRepoUrl: string | null
+  title: string
+  baseUrl: string
+  sessionIngressUrl: string
+  /**
+   * Opaque string sent as metadata.worker_type. Use BridgeWorkerType for
+   * the two CLI-originated values; daemon callers may send any string the
+   * backend recognizes (it's just a filter key on the web side).
+   */
+  workerType: string
+  getAccessToken: () => string | undefined
+  /**
+   * POST /v1/sessions. Injected because `createSession.ts` lazy-loads
+   * `auth.ts`/`model.ts`/`oauth/client.ts` and `bun --outfile` inlines
+   * dynamic imports — the lazy-load doesn't help, the whole REPL tree ends
+   * up in the Agent SDK bundle.
+   *
+   * REPL wrapper passes `createBridgeSession` from `createSession.ts`.
+   * Daemon wrapper passes `createBridgeSessionLean` from `sessionApi.ts`
+   * (HTTP-only, orgUUID+model supplied by the daemon caller).
+   *
+   * Receives `gitRepoUrl`+`branch` so the REPL wrapper can build the git
+   * source/outcome for claude.ai's session card. Daemon ignores them.
+   */
+  createSession: (opts: {
+    environmentId: string
+    title: string
+    gitRepoUrl: string | null
+    branch: string
+    signal: AbortSignal
+  }) => Promise<string | null>
+  /**
+   * POST /v1/sessions/{id}/archive. Same injection rationale. Best-effort;
+   * the callback MUST NOT throw.
+   */
+  archiveSession: (sessionId: string) => Promise<void>
+  /**
+   * Invoked on reconnect-after-env-lost to refresh the title. REPL wrapper
+   * reads session storage (picks up /rename); daemon returns the static
+   * title. Defaults to () => title.
+   */
+  getCurrentTitle?: () => string
+  /**
+   * Converts internal Message[] → SDKMessage[] for writeMessages() and the
+   * initial-flush/drain paths. REPL wrapper passes the real toSDKMessages
+   * from utils/messages/mappers.ts. Daemon callers that only use
+   * writeSdkMessages() and pass no initialMessages can omit this — those
+   * code paths are unreachable.
+   *
+   * Injected rather than imported because mappers.ts transitively pulls in
+   * src/commands.ts via messages.ts → api.ts → prompts.ts, dragging the
+   * entire command registry + React tree into the Agent SDK bundle.
+   */
+  toSDKMessages?: (messages: Message[]) => SDKMessage[]
+  /**
+   * OAuth 401 refresh handler passed to createBridgeApiClient. REPL wrapper
+   * passes handleOAuth401Error; daemon passes its AuthManager's handler.
+   * Injected because utils/auth.ts transitively pulls in the command
+   * registry via config.ts → file.ts → permissions/filesystem.ts →
+   * sessionStorage.ts → commands.ts.
+   */
+  onAuth401?: (staleAccessToken: string) => Promise<boolean>
+  /**
+   * Poll interval config getter for the work-poll heartbeat loop. REPL
+   * wrapper passes the GrowthBook-backed getPollIntervalConfig (allows ops
+   * to live-tune poll rates fleet-wide). Daemon passes a static config
+   * with a 60s heartbeat (5× headroom under the 300s work-lease TTL).
+   * Injected because growthbook.ts transitively pulls in the command
+   * registry via the same config.ts chain.
+   */
+  getPollIntervalConfig?: () => PollIntervalConfig
+  /**
+   * Max initial messages to replay on connect. REPL wrapper reads from the
+   * tengu_bridge_initial_history_cap GrowthBook flag. Daemon passes no
+   * initialMessages so this is never read. Default 200 matches the flag
+   * default.
+   */
+  initialHistoryCap?: number
+  // Same REPL-flush machinery as InitBridgeOptions — daemon omits these.
+  initialMessages?: Message[]
+  previouslyFlushedUUIDs?: Set<string>
+  onInboundMessage?: (msg: SDKMessage) => void
+  onPermissionResponse?: (response: SDKControlResponse) => void
+  onInterrupt?: () => void
+  onSetModel?: (model: string | undefined) => void
+  onSetMaxThinkingTokens?: (maxTokens: number | null) => void
+  /**
+   * Returns a policy verdict so this module can emit an error control_response
+   * without importing the policy checks itself (bootstrap-isolation constraint).
+   * The callback must guard `auto` (isAutoModeGateEnabled) and
+   * `bypassPermissions` (isBypassPermissionsModeDisabled AND
+   * isBypassPermissionsModeAvailable) BEFORE calling transitionPermissionMode —
+   * that function's internal auto-gate check is a defensive throw, not a
+   * graceful guard, and its side-effect order is setAutoModeActive(true) then
+   * throw, which corrupts the 3-way invariant documented in src/CLAUDE.md if
+   * the callback lets the throw escape here.
+   */
+  onSetPermissionMode?: (
+    mode: PermissionMode,
+  ) => { ok: true } | { ok: false; error: string }
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  /**
+   * Fires on each real user message to flow through writeMessages() until
+   * the callback returns true (done). Mirrors remoteBridgeCore.ts's
+   * onUserMessage so the REPL bridge can derive a session title from early
+   * prompts when none was set at init time (e.g. user runs /remote-control
+   * on an empty conversation, then types). Tool-result wrappers, meta
+   * messages, and display-tag-only messages are skipped. Receives
+   * currentSessionId so the wrapper can PATCH the title without a closure
+   * dance to reach the not-yet-returned handle. The caller owns the
+   * derive-at-count-1-and-3 policy; the transport just keeps calling until
+   * told to stop. Not fired for the writeSdkMessages daemon path (daemon
+   * sets its own title at init). Distinct from SessionSpawnOpts's
+   * onFirstUserMessage (spawn-bridge, PR #21250), which stays fire-once.
+   */
+  onUserMessage?: (text: string, sessionId: string) => boolean
+  /** See InitBridgeOptions.perpetual. */
+  perpetual?: boolean
+  /**
+   * Seeds lastTransportSequenceNum — the SSE event-stream high-water mark
+   * that's carried across transport swaps within one process. Daemon callers
+   * pass the value they persisted at shutdown so the FIRST SSE connect of a
+   * fresh process sends from_sequence_num and the server doesn't replay full
+   * history. REPL callers omit (fresh session each run → 0 is correct).
+   */
+  initialSSESequenceNum?: number
+}
+
+/**
+ * Superset of ReplBridgeHandle. Adds getSSESequenceNum for daemon callers
+ * that persist the SSE seq-num across process restarts and pass it back as
+ * initialSSESequenceNum on the next start.
+ */
+export type BridgeCoreHandle = ReplBridgeHandle & {
+  /**
+   * Current SSE sequence-number high-water mark. Updates as transports
+   * swap. Daemon callers persist this on shutdown and pass it back as
+   * initialSSESequenceNum on next start.
+   */
+  getSSESequenceNum(): number
+}
+
+/**
+ * Poll error recovery constants. When the work poll starts failing (e.g.
+ * server 500s), we use exponential backoff and give up after this timeout.
+ * This is deliberately long — the server is the authority on when a session
+ * is truly dead. As long as the server accepts our poll, we keep waiting
+ * for it to re-dispatch the work item.
+ */
+const POLL_ERROR_INITIAL_DELAY_MS = 2_000
+const POLL_ERROR_MAX_DELAY_MS = 60_000
+const POLL_ERROR_GIVE_UP_MS = 15 * 60 * 1000
+
+// Monotonically increasing counter for distinguishing init calls in logs
+let initSequence = 0
+
+/**
+ * Bootstrap-free core: env registration → session creation → poll loop →
+ * ingress WS → teardown. Reads nothing from bootstrap/state or
+ * sessionStorage — all context comes from params. Caller (initReplBridge
+ * below, or a daemon in PR 4) has already passed entitlement gates and
+ * gathered git/auth/title.
+ *
+ * Returns null on registration or session-creation failure.
+ */
+export async function initBridgeCore(
+  params: BridgeCoreParams,
+): Promise<BridgeCoreHandle | null> {
+  const {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    title,
+    baseUrl,
+    sessionIngressUrl,
+    workerType,
+    getAccessToken,
+    createSession,
+    archiveSession,
+    getCurrentTitle = () => title,
+    toSDKMessages = () => {
+      throw new Error(
+        'BridgeCoreParams.toSDKMessages not provided. Pass it if you use writeMessages() or initialMessages — daemon callers that only use writeSdkMessages() never hit this path.',
+      )
+    },
+    onAuth401,
+    getPollIntervalConfig = () => DEFAULT_POLL_CONFIG,
+    initialHistoryCap = 200,
+    initialMessages,
+    previouslyFlushedUUIDs,
+    onInboundMessage,
+    onPermissionResponse,
+    onInterrupt,
+    onSetModel,
+    onSetMaxThinkingTokens,
+    onSetPermissionMode,
+    onStateChange,
+    onUserMessage,
+    perpetual,
+    initialSSESequenceNum = 0,
+  } = params
+
+  const seq = ++initSequence
+
+  // bridgePointer import hoisted: perpetual mode reads it before register;
+  // non-perpetual writes it after session create; both use clear at teardown.
+  const { writeBridgePointer, clearBridgePointer, readBridgePointer } =
+    await import('./bridgePointer.js')
+
+  // Perpetual mode: read the crash-recovery pointer and treat it as prior
+  // state. The pointer is written unconditionally after session create
+  // (crash-recovery for all sessions); perpetual mode just skips the
+  // teardown clear so it survives clean exits too. Only reuse 'repl'
+  // pointers — a crashed standalone bridge (`claude remote-control`)
+  // writes source:'standalone' with a different workerType.
+  const rawPrior = perpetual ? await readBridgePointer(dir) : null
+  const prior = rawPrior?.source === 'repl' ? rawPrior : null
+
+  logForDebugging(
+    `[bridge:repl] initBridgeCore #${seq} starting (initialMessages=${initialMessages?.length ?? 0}${prior ? ` perpetual prior=env:${prior.environmentId}` : ''})`,
+  )
+
+  // 5. Register bridge environment
+  const rawApi = createBridgeApiClient({
+    baseUrl,
+    getAccessToken,
+    runnerVersion: MACRO.VERSION,
+    onDebug: logForDebugging,
+    onAuth401,
+    getTrustedDeviceToken,
+  })
+  // Ant-only: interpose so /bridge-kick can inject poll/register/heartbeat
+  // failures. Zero cost in external builds (rawApi passes through unchanged).
+  const api =
+    process.env.USER_TYPE === 'ant' ? wrapApiForFaultInjection(rawApi) : rawApi
+
+  const bridgeConfig: BridgeConfig = {
+    dir,
+    machineName,
+    branch,
+    gitRepoUrl,
+    maxSessions: 1,
+    spawnMode: 'single-session',
+    verbose: false,
+    sandbox: false,
+    bridgeId: randomUUID(),
+    workerType,
+    environmentId: randomUUID(),
+    reuseEnvironmentId: prior?.environmentId,
+    apiBaseUrl: baseUrl,
+    sessionIngressUrl,
+  }
+
+  let environmentId: string
+  let environmentSecret: string
+  try {
+    const reg = await api.registerBridgeEnvironment(bridgeConfig)
+    environmentId = reg.environment_id
+    environmentSecret = reg.environment_secret
+  } catch (err) {
+    logBridgeSkip(
+      'registration_failed',
+      `[bridge:repl] Environment registration failed: ${errorMessage(err)}`,
+    )
+    // Stale pointer may be the cause (expired/deleted env) — clear it so
+    // the next start doesn't retry the same dead ID.
+    if (prior) {
+      await clearBridgePointer(dir)
+    }
+    onStateChange?.('failed', errorMessage(err))
+    return null
+  }
+
+  logForDebugging(`[bridge:repl] Environment registered: ${environmentId}`)
+  logForDiagnosticsNoPII('info', 'bridge_repl_env_registered')
+  logEvent('tengu_bridge_repl_env_registered', {})
+
+  /**
+   * Reconnect-in-place: if the just-registered environmentId matches what
+   * was requested, call reconnectSession to force-stop stale workers and
+   * re-queue the session. Used at init (perpetual mode — env is alive but
+   * idle after clean teardown) and in doReconnect() Strategy 1 (env lost
+   * then resurrected). Returns true on success; caller falls back to
+   * fresh session creation on false.
+   */
+  async function tryReconnectInPlace(
+    requestedEnvId: string,
+    sessionId: string,
+  ): Promise<boolean> {
+    if (environmentId !== requestedEnvId) {
+      logForDebugging(
+        `[bridge:repl] Env mismatch (requested ${requestedEnvId}, got ${environmentId}) — cannot reconnect in place`,
+      )
+      return false
+    }
+    // The pointer stores what createBridgeSession returned (session_*,
+    // compat/convert.go:41). /bridge/reconnect is an environments-layer
+    // endpoint — once the server's ccr_v2_compat_enabled gate is on it
+    // looks sessions up by their infra tag (cse_*) and returns "Session
+    // not found" for the session_* costume. We don't know the gate state
+    // pre-poll, so try both; the re-tag is a no-op if the ID is already
+    // cse_* (doReconnect Strategy 1 path — currentSessionId never mutates
+    // to cse_* but future-proof the check).
+    const infraId = toInfraSessionId(sessionId)
+    const candidates =
+      infraId === sessionId ? [sessionId] : [sessionId, infraId]
+    for (const id of candidates) {
+      try {
+        await api.reconnectSession(environmentId, id)
+        logForDebugging(
+          `[bridge:repl] Reconnected session ${id} in place on env ${environmentId}`,
+        )
+        return true
+      } catch (err) {
+        logForDebugging(
+          `[bridge:repl] reconnectSession(${id}) failed: ${errorMessage(err)}`,
+        )
+      }
+    }
+    logForDebugging(
+      '[bridge:repl] reconnectSession exhausted — falling through to fresh session',
+    )
+    return false
+  }
+
+  // Perpetual init: env is alive but has no queued work after clean
+  // teardown. reconnectSession re-queues it. doReconnect() has the same
+  // call but only fires on poll 404 (env dead);
+  // here the env is alive but idle.
+  const reusedPriorSession = prior
+    ? await tryReconnectInPlace(prior.environmentId, prior.sessionId)
+    : false
+  if (prior && !reusedPriorSession) {
+    await clearBridgePointer(dir)
+  }
+
+  // 6. Create session on the bridge. Initial messages are NOT included as
+  // session creation events because those use STREAM_ONLY persistence and
+  // are published before the CCR UI subscribes, so they get lost. Instead,
+  // initial messages are flushed via the ingress WebSocket once it connects.
+
+  // Mutable session ID — updated when the environment+session pair is
+  // re-created after a connection loss.
+  let currentSessionId: string
+
+
+  if (reusedPriorSession && prior) {
+    currentSessionId = prior.sessionId
+    logForDebugging(
+      `[bridge:repl] Perpetual session reused: ${currentSessionId}`,
+    )
+    // Server already has all initialMessages from the prior CLI run. Mark
+    // them as previously-flushed so the initial flush filter excludes them
+    // (previouslyFlushedUUIDs is a fresh Set on every CLI start). Duplicate
+    // UUIDs cause the server to kill the WebSocket.
+    if (initialMessages && previouslyFlushedUUIDs) {
+      for (const msg of initialMessages) {
+        previouslyFlushedUUIDs.add(msg.uuid)
+      }
+    }
+  } else {
+    const createdSessionId = await createSession({
+      environmentId,
+      title,
+      gitRepoUrl,
+      branch,
+      signal: AbortSignal.timeout(15_000),
+    })
+
+    if (!createdSessionId) {
+      logForDebugging(
+        '[bridge:repl] Session creation failed, deregistering environment',
+      )
+      logEvent('tengu_bridge_repl_session_failed', {})
+      await api.deregisterEnvironment(environmentId).catch(() => {})
+      onStateChange?.('failed', 'Session creation failed')
+      return null
+    }
+
+    currentSessionId = createdSessionId
+    logForDebugging(`[bridge:repl] Session created: ${currentSessionId}`)
+  }
+
+  // Crash-recovery pointer: written now so a kill -9 at any point after
+  // this leaves a recoverable trail. Cleared in teardown (non-perpetual)
+  // or left alone (perpetual mode — pointer survives clean exit too).
+  // `claude remote-control --continue` from the same directory will detect
+  // it and offer to resume.
+  await writeBridgePointer(dir, {
+    sessionId: currentSessionId,
+    environmentId,
+    source: 'repl',
+  })
+  logForDiagnosticsNoPII('info', 'bridge_repl_session_created')
+  logEvent('tengu_bridge_repl_started', {
+    has_initial_messages: !!(initialMessages && initialMessages.length > 0),
+    inProtectedNamespace: isInProtectedNamespace(),
+  })
+
+  // UUIDs of initial messages. Used for dedup in writeMessages to avoid
+  // re-sending messages that were already flushed on WebSocket open.
+  const initialMessageUUIDs = new Set<string>()
+  if (initialMessages) {
+    for (const msg of initialMessages) {
+      initialMessageUUIDs.add(msg.uuid)
+    }
+  }
+
+  // Bounded ring buffer of UUIDs for messages we've already sent to the
+  // server via the ingress WebSocket. Serves two purposes:
+  //  1. Echo filtering — ignore our own messages bouncing back on the WS.
+  //  2. Secondary dedup in writeMessages — catch race conditions where
+  //     the hook's index-based tracking isn't sufficient.
+  //
+  // Seeded with initialMessageUUIDs so that when the server echoes back
+  // the initial conversation context over the ingress WebSocket, those
+  // messages are recognized as echoes and not re-injected into the REPL.
+  //
+  // Capacity of 2000 covers well over any realistic echo window (echoes
+  // arrive within milliseconds) and any messages that might be re-encountered
+  // after compaction. The hook's lastWrittenIndexRef is the primary dedup;
+  // this is a safety net.
+  const recentPostedUUIDs = new BoundedUUIDSet(2000)
+  for (const uuid of initialMessageUUIDs) {
+    recentPostedUUIDs.add(uuid)
+  }
+
+  // Bounded set of INBOUND prompt UUIDs we've already forwarded to the REPL.
+  // Defensive dedup for when the server re-delivers prompts (seq-num
+  // negotiation failure, server edge cases, transport swap races). The
+  // seq-num carryover below is the primary fix; this is the safety net.
+  const recentInboundUUIDs = new BoundedUUIDSet(2000)
+
+  // 7. Start poll loop for work items — this is what makes the session
+  // "live" on claude.ai. When a user types there, the backend dispatches
+  // a work item to our environment. We poll for it, get the ingress token,
+  // and connect the ingress WebSocket.
+  //
+  // The poll loop keeps running: when work arrives it connects the ingress
+  // WebSocket, and if the WebSocket drops unexpectedly (code != 1000) it
+  // resumes polling to get a fresh ingress token and reconnect.
+  const pollController = new AbortController()
+  // Adapter over either HybridTransport (v1: WS reads + POST writes to
+  // Session-Ingress) or SSETransport+CCRClient (v2: SSE reads + POST
+  // writes to CCR /worker/*). The v1/v2 choice is made in onWorkReceived:
+  // server-driven via secret.use_code_sessions, with CLAUDE_BRIDGE_USE_CCR_V2
+  // as an ant-dev override.
+  let transport: ReplBridgeTransport | null = null
+  // Bumped on every onWorkReceived. Captured in createV2ReplTransport's .then()
+  // closure to detect stale resolutions: if two calls race while transport is
+  // null, both registerWorker() (bumping server epoch), and whichever resolves
+  // SECOND is the correct one — but the transport !== null check gets this
+  // backwards (first-to-resolve installs, second discards). The generation
+  // counter catches it independent of transport state.
+  let v2Generation = 0
+  // SSE sequence-number high-water mark carried across transport swaps.
+  // Without this, each new SSETransport starts at 0, sends no
+  // from_sequence_num / Last-Event-ID on its first connect, and the server
+  // replays the entire session event history — every prompt ever sent
+  // re-delivered as fresh inbound messages on every onWorkReceived.
+  //
+  // Seed only when we actually reconnected the prior session. If
+  // `reusedPriorSession` is false we fell through to `createSession()` —
+  // the caller's persisted seq-num belongs to a dead session and applying
+  // it to the fresh stream (starting at 1) silently drops events. Same
+  // hazard as doReconnect Strategy 2; same fix as the reset there.
+  let lastTransportSequenceNum = reusedPriorSession ? initialSSESequenceNum : 0
+  // Track the current work ID so teardown can call stopWork
+  let currentWorkId: string | null = null
+  // Session ingress JWT for the current work item — used for heartbeat auth.
+  let currentIngressToken: string | null = null
+  // Signal to wake the at-capacity sleep early when the transport is lost,
+  // so the poll loop immediately switches back to fast polling for new work.
+  const capacityWake = createCapacityWake(pollController.signal)
+  const wakePollLoop = capacityWake.wake
+  const capacitySignal = capacityWake.signal
+  // Gates message writes during the initial flush to prevent ordering
+  // races where new messages arrive at the server interleaved with history.
+  const flushGate = new FlushGate<Message>()
+
+  // Latch for onUserMessage — flips true when the callback returns true
+  // (policy says "done deriving"). If no callback, skip scanning entirely
+  // (daemon path — no title derivation needed).
+  let userMessageCallbackDone = !onUserMessage
+
+  // Shared counter for environment re-creations, used by both
+  // onEnvironmentLost and the abnormal-close handler.
+  const MAX_ENVIRONMENT_RECREATIONS = 3
+  let environmentRecreations = 0
+  let reconnectPromise: Promise<boolean> | null = null
+
+  /**
+   * Recover from onEnvironmentLost (poll returned 404 — env was reaped
+   * server-side). Tries two strategies in order:
+   *
+   *   1. Reconnect-in-place: idempotent re-register with reuseEnvironmentId
+   *      → if the backend returns the same env ID, call reconnectSession()
+   *      to re-queue the existing session. currentSessionId stays the same;
+   *      the URL on the user's phone stays valid; previouslyFlushedUUIDs is
+   *      preserved so history isn't re-sent.
+   *
+   *   2. Fresh session fallback: if the backend returns a different env ID
+   *      (original TTL-expired, e.g. laptop slept >4h) or reconnectSession()
+   *      throws, archive the old session and create a new one on the
+   *      now-registered env. Old behavior before #20460 primitives landed.
+   *
+   * Uses a promise-based reentrancy guard so concurrent callers share the
+   * same reconnection attempt.
+   */
+  async function reconnectEnvironmentWithSession(): Promise<boolean> {
+    if (reconnectPromise) {
+      return reconnectPromise
+    }
+    reconnectPromise = doReconnect()
+    try {
+      return await reconnectPromise
+    } finally {
+      reconnectPromise = null
+    }
+  }
+
+  async function doReconnect(): Promise<boolean> {
+    environmentRecreations++
+    // Invalidate any in-flight v2 handshake — the environment is being
+    // recreated, so a stale transport arriving post-reconnect would be
+    // pointed at a dead session.
+    v2Generation++
+    logForDebugging(
+      `[bridge:repl] Reconnecting after env lost (attempt ${environmentRecreations}/${MAX_ENVIRONMENT_RECREATIONS})`,
+    )
+
+    if (environmentRecreations > MAX_ENVIRONMENT_RECREATIONS) {
+      logForDebugging(
+        `[bridge:repl] Environment reconnect limit reached (${MAX_ENVIRONMENT_RECREATIONS}), giving up`,
+      )
+      return false
+    }
+
+    // Close the stale transport. Capture seq BEFORE close — if Strategy 1
+    // (tryReconnectInPlace) succeeds we keep the SAME session, and the
+    // next transport must resume where this one left off, not replay from
+    // the last transport-swap checkpoint.
+    if (transport) {
+      const seq = transport.getLastSequenceNum()
+      if (seq > lastTransportSequenceNum) {
+        lastTransportSequenceNum = seq
+      }
+      transport.close()
+      transport = null
+    }
+    // Transport is gone — wake the poll loop out of its at-capacity
+    // heartbeat sleep so it can fast-poll for re-dispatched work.
+    wakePollLoop()
+    // Reset flush gate so writeMessages() hits the !transport guard
+    // instead of silently queuing into a dead buffer.
+    flushGate.drop()
+
+    // Release the current work item (force=false — we may want the session
+    // back). Best-effort: the env is probably gone, so this likely 404s.
+    if (currentWorkId) {
+      const workIdBeingCleared = currentWorkId
+      await api
+        .stopWork(environmentId, workIdBeingCleared, false)
+        .catch(() => {})
+      // When doReconnect runs concurrently with the poll loop (ws_closed
+      // handler case — void-called, unlike the awaited onEnvironmentLost
+      // path), onWorkReceived can fire during the stopWork await and set
+      // a fresh currentWorkId. If it did, the poll loop has already
+      // recovered on its own — defer to it rather than proceeding to
+      // archiveSession, which would destroy the session its new
+      // transport is connected to.
+      if (currentWorkId !== workIdBeingCleared) {
+        logForDebugging(
+          '[bridge:repl] Poll loop recovered during stopWork await — deferring to it',
+        )
+        environmentRecreations = 0
+        return true
+      }
+      currentWorkId = null
+      currentIngressToken = null
+    }
+
+    // Bail out if teardown started while we were awaiting
+    if (pollController.signal.aborted) {
+      logForDebugging('[bridge:repl] Reconnect aborted by teardown')
+      return false
+    }
+
+    // Strategy 1: idempotent re-register with the server-issued env ID.
+    // If the backend resurrects the same env (fresh secret), we can
+    // reconnect the existing session. If it hands back a different ID, the
+    // original env is truly gone and we fall through to a fresh session.
+    const requestedEnvId = environmentId
+    bridgeConfig.reuseEnvironmentId = requestedEnvId
+    try {
+      const reg = await api.registerBridgeEnvironment(bridgeConfig)
+      environmentId = reg.environment_id
+      environmentSecret = reg.environment_secret
+    } catch (err) {
+      bridgeConfig.reuseEnvironmentId = undefined
+      logForDebugging(
+        `[bridge:repl] Environment re-registration failed: ${errorMessage(err)}`,
+      )
+      return false
+    }
+    // Clear before any await — a stale value would poison the next fresh
+    // registration if doReconnect runs again.
+    bridgeConfig.reuseEnvironmentId = undefined
+
+    logForDebugging(
+      `[bridge:repl] Re-registered: requested=${requestedEnvId} got=${environmentId}`,
+    )
+
+    // Bail out if teardown started while we were registering
+    if (pollController.signal.aborted) {
+      logForDebugging(
+        '[bridge:repl] Reconnect aborted after env registration, cleaning up',
+      )
+      await api.deregisterEnvironment(environmentId).catch(() => {})
+      return false
+    }
+
+    // Same race as above, narrower window: poll loop may have set up a
+    // transport during the registerBridgeEnvironment await. Bail before
+    // tryReconnectInPlace/archiveSession kill it server-side.
+    if (transport !== null) {
+      logForDebugging(
+        '[bridge:repl] Poll loop recovered during registerBridgeEnvironment await — deferring to it',
+      )
+      environmentRecreations = 0
+      return true
+    }
+
+    // Strategy 1: same helper as perpetual init. currentSessionId stays
+    // the same on success; URL on mobile/web stays valid;
+    // previouslyFlushedUUIDs preserved (no re-flush).
+    if (await tryReconnectInPlace(requestedEnvId, currentSessionId)) {
+      logEvent('tengu_bridge_repl_reconnected_in_place', {})
+      environmentRecreations = 0
+      return true
+    }
+    // Env differs → TTL-expired/reaped; or reconnect failed.
+    // Don't deregister — we have a fresh secret for this env either way.
+    if (environmentId !== requestedEnvId) {
+      logEvent('tengu_bridge_repl_env_expired_fresh_session', {})
+    }
+
+    // Strategy 2: fresh session on the now-registered environment.
+    // Archive the old session first — it's orphaned (bound to a dead env,
+    // or reconnectSession rejected it). Don't deregister the env — we just
+    // got a fresh secret for it and are about to use it.
+    await archiveSession(currentSessionId)
+
+    // Bail out if teardown started while we were archiving
+    if (pollController.signal.aborted) {
+      logForDebugging(
+        '[bridge:repl] Reconnect aborted after archive, cleaning up',
+      )
+      await api.deregisterEnvironment(environmentId).catch(() => {})
+      return false
+    }
+
+    // Re-read the current title in case the user renamed the session.
+    // REPL wrapper reads session storage; daemon wrapper returns the
+    // original title (nothing to refresh).
+    const currentTitle = getCurrentTitle()
+
+    // Create a new session on the now-registered environment
+    const newSessionId = await createSession({
+      environmentId,
+      title: currentTitle,
+      gitRepoUrl,
+      branch,
+      signal: AbortSignal.timeout(15_000),
+    })
+
+    if (!newSessionId) {
+      logForDebugging(
+        '[bridge:repl] Session creation failed during reconnection',
+      )
+      return false
+    }
+
+    // Bail out if teardown started during session creation (up to 15s)
+    if (pollController.signal.aborted) {
+      logForDebugging(
+        '[bridge:repl] Reconnect aborted after session creation, cleaning up',
+      )
+      await archiveSession(newSessionId)
+      return false
+    }
+
+    currentSessionId = newSessionId
+    // Re-publish to the PID file so peer dedup (peerRegistry.ts) picks up the
+    // new ID — setReplBridgeHandle only fires at init/teardown, not reconnect.
+    void updateSessionBridgeId(toCompatSessionId(newSessionId)).catch(() => {})
+    // Reset per-session transport state IMMEDIATELY after the session swap,
+    // before any await. If this runs after `await writeBridgePointer` below,
+    // there's a window where handle.bridgeSessionId already returns session B
+    // but getSSESequenceNum() still returns session A's seq — a daemon
+    // persistState() in that window writes {bridgeSessionId: B, seq: OLD_A},
+    // which PASSES the session-ID validation check and defeats it entirely.
+    //
+    // The SSE seq-num is scoped to the session's event stream — carrying it
+    // over leaves the transport's lastSequenceNum stuck high (seq only
+    // advances when received > last), and its next internal reconnect would
+    // send from_sequence_num=OLD_SEQ against a stream starting at 1 → all
+    // events in the gap silently dropped. Inbound UUID dedup is also
+    // session-scoped.
+    lastTransportSequenceNum = 0
+    recentInboundUUIDs.clear()
+    // Title derivation is session-scoped too: if the user typed during the
+    // createSession await above, the callback fired against the OLD archived
+    // session ID (PATCH lost) and the new session got `currentTitle` captured
+    // BEFORE they typed. Reset so the next prompt can re-derive. Self-
+    // correcting: if the caller's policy is already done (explicit title or
+    // count ≥ 3), it returns true on the first post-reset call and re-latches.
+    userMessageCallbackDone = !onUserMessage
+    logForDebugging(`[bridge:repl] Re-created session: ${currentSessionId}`)
+
+    // Rewrite the crash-recovery pointer with the new IDs so a crash after
+    // this point resumes the right session. (The reconnect-in-place path
+    // above doesn't touch the pointer — same session, same env.)
+    await writeBridgePointer(dir, {
+      sessionId: currentSessionId,
+      environmentId,
+      source: 'repl',
+    })
+
+    // Clear flushed UUIDs so initial messages are re-sent to the new session.
+    // UUIDs are scoped per-session on the server, so re-flushing is safe.
+    previouslyFlushedUUIDs?.clear()
+
+
+    // Reset the counter so independent reconnections hours apart don't
+    // exhaust the limit — it guards against rapid consecutive failures,
+    // not lifetime total.
+    environmentRecreations = 0
+
+    return true
+  }
+
+  // Helper: get the current OAuth access token for session ingress auth.
+  // Unlike the JWT path, OAuth tokens are refreshed by the standard OAuth
+  // flow — no proactive scheduler needed.
+  function getOAuthToken(): string | undefined {
+    return getAccessToken()
+  }
+
+  // Drain any messages that were queued during the initial flush.
+  // Called after writeBatch completes (or fails) so queued messages
+  // are sent in order after the historical messages.
+  function drainFlushGate(): void {
+    const msgs = flushGate.end()
+    if (msgs.length === 0) return
+    if (!transport) {
+      logForDebugging(
+        `[bridge:repl] Cannot drain ${msgs.length} pending message(s): no transport`,
+      )
+      return
+    }
+    for (const msg of msgs) {
+      recentPostedUUIDs.add(msg.uuid)
+    }
+    const sdkMessages = toSDKMessages(msgs)
+    const events = sdkMessages.map(sdkMsg => ({
+      ...sdkMsg,
+      session_id: currentSessionId,
+    }))
+    logForDebugging(
+      `[bridge:repl] Drained ${msgs.length} pending message(s) after flush`,
+    )
+    void transport.writeBatch(events)
+  }
+
+  // Teardown reference — set after definition below. All callers are async
+  // callbacks that run after assignment, so the reference is always valid.
+  let doTeardownImpl: (() => Promise<void>) | null = null
+  function triggerTeardown(): void {
+    void doTeardownImpl?.()
+  }
+
+  /**
+   * Body of the transport's setOnClose callback, hoisted to initBridgeCore
+   * scope so /bridge-kick can fire it directly. setOnClose wraps this with
+   * a stale-transport guard; debugFireClose calls it bare.
+   *
+   * With autoReconnect:true, this only fires on: clean close (1000),
+   * permanent server rejection (4001/1002/4003), or 10-min budget
+   * exhaustion. Transient drops are retried internally by the transport.
+   */
+  function handleTransportPermanentClose(closeCode: number | undefined): void {
+    logForDebugging(
+      `[bridge:repl] Transport permanently closed: code=${closeCode}`,
+    )
+    logEvent('tengu_bridge_repl_ws_closed', {
+      code: closeCode,
+    })
+    // Capture SSE seq high-water mark before nulling. When called from
+    // setOnClose the guard guarantees transport !== null; when fired from
+    // /bridge-kick it may already be null (e.g. fired twice) — skip.
+    if (transport) {
+      const closedSeq = transport.getLastSequenceNum()
+      if (closedSeq > lastTransportSequenceNum) {
+        lastTransportSequenceNum = closedSeq
+      }
+      transport = null
+    }
+    // Transport is gone — wake the poll loop out of its at-capacity
+    // heartbeat sleep so it's fast-polling by the time the reconnect
+    // below completes and the server re-queues work.
+    wakePollLoop()
+    // Reset flush state so writeMessages() hits the !transport guard
+    // (with a warning log) instead of silently queuing into a buffer
+    // that will never be drained. Unlike onWorkReceived (which
+    // preserves pending messages for the new transport), onClose is
+    // a permanent close — no new transport will drain these.
+    const dropped = flushGate.drop()
+    if (dropped > 0) {
+      logForDebugging(
+        `[bridge:repl] Dropping ${dropped} pending message(s) on transport close (code=${closeCode})`,
+        { level: 'warn' },
+      )
+    }
+
+    if (closeCode === 1000) {
+      // Clean close — session ended normally. Tear down the bridge.
+      onStateChange?.('failed', 'session ended')
+      pollController.abort()
+      triggerTeardown()
+      return
+    }
+
+    // Transport reconnect budget exhausted or permanent server
+    // rejection. By this point the env has usually been reaped
+    // server-side (BQ 2026-03-12: ~98% of ws_closed never recover
+    // via poll alone). stopWork(force=false) can't re-dispatch work
+    // from an archived env; reconnectEnvironmentWithSession can
+    // re-activate it via POST /bridge/reconnect, or fall through
+    // to a fresh session if the env is truly gone. The poll loop
+    // (already woken above) picks up the re-queued work once
+    // doReconnect completes.
+    onStateChange?.(
+      'reconnecting',
+      `Remote Control connection lost (code ${closeCode})`,
+    )
+    logForDebugging(
+      `[bridge:repl] Transport reconnect budget exhausted (code=${closeCode}), attempting env reconnect`,
+    )
+    void reconnectEnvironmentWithSession().then(success => {
+      if (success) return
+      // doReconnect has four abort-check return-false sites for
+      // teardown-in-progress. Don't pollute the BQ failure signal
+      // or double-teardown when the user just quit.
+      if (pollController.signal.aborted) return
+      // doReconnect returns false (never throws) on genuine failure.
+      // The dangerous case: registerBridgeEnvironment succeeded (so
+      // environmentId now points at a fresh valid env) but
+      // createSession failed — poll loop would poll a sessionless
+      // env getting null work with no errors, never hitting any
+      // give-up path. Tear down explicitly.
+      logForDebugging(
+        '[bridge:repl] reconnectEnvironmentWithSession resolved false — tearing down',
+      )
+      logEvent('tengu_bridge_repl_reconnect_failed', {
+        close_code: closeCode,
+      })
+      onStateChange?.('failed', 'reconnection failed')
+      triggerTeardown()
+    })
+  }
+
+  // Ant-only: SIGUSR2 → force doReconnect() for manual testing. Skips the
+  // ~30s poll wait — fire-and-observe in the debug log immediately.
+  // Windows has no USR signals; `process.on` would throw there.
+  let sigusr2Handler: (() => void) | undefined
+  if (process.env.USER_TYPE === 'ant' && process.platform !== 'win32') {
+    sigusr2Handler = () => {
+      logForDebugging(
+        '[bridge:repl] SIGUSR2 received — forcing doReconnect() for testing',
+      )
+      void reconnectEnvironmentWithSession()
+    }
+    process.on('SIGUSR2', sigusr2Handler)
+  }
+
+  // Ant-only: /bridge-kick fault injection. handleTransportPermanentClose
+  // is defined below and assigned into this slot so the slash command can
+  // invoke it directly — the real setOnClose callback is buried inside
+  // wireTransport which is itself inside onWorkReceived.
+  let debugFireClose: ((code: number) => void) | null = null
+  if (process.env.USER_TYPE === 'ant') {
+    registerBridgeDebugHandle({
+      fireClose: code => {
+        if (!debugFireClose) {
+          logForDebugging('[bridge:debug] fireClose: no transport wired yet')
+          return
+        }
+        logForDebugging(`[bridge:debug] fireClose(${code}) — injecting`)
+        debugFireClose(code)
+      },
+      forceReconnect: () => {
+        logForDebugging('[bridge:debug] forceReconnect — injecting')
+        void reconnectEnvironmentWithSession()
+      },
+      injectFault: injectBridgeFault,
+      wakePollLoop,
+      describe: () =>
+        `env=${environmentId} session=${currentSessionId} transport=${transport?.getStateLabel() ?? 'null'} workId=${currentWorkId ?? 'null'}`,
+    })
+  }
+
+  const pollOpts = {
+    api,
+    getCredentials: () => ({ environmentId, environmentSecret }),
+    signal: pollController.signal,
+    getPollIntervalConfig,
+    onStateChange,
+    getWsState: () => transport?.getStateLabel() ?? 'null',
+    // REPL bridge is single-session: having any transport == at capacity.
+    // No need to check isConnectedStatus() — even while the transport is
+    // auto-reconnecting internally (up to 10 min), poll is heartbeat-only.
+    isAtCapacity: () => transport !== null,
+    capacitySignal,
+    onFatalError: triggerTeardown,
+    getHeartbeatInfo: () => {
+      if (!currentWorkId || !currentIngressToken) {
+        return null
+      }
+      return {
+        environmentId,
+        workId: currentWorkId,
+        sessionToken: currentIngressToken,
+      }
+    },
+    // Work-item JWT expired (or work gone). The transport is useless —
+    // SSE reconnects and CCR writes use the same stale token. Without
+    // this callback the poll loop would do a 10-min at-capacity backoff,
+    // during which the work lease (300s TTL) expires and the server stops
+    // forwarding prompts → ~25-min dead window observed in daemon logs.
+    // Kill the transport + work state so isAtCapacity()=false; the loop
+    // fast-polls and picks up the server's re-dispatched work in seconds.
+    onHeartbeatFatal: (err: BridgeFatalError) => {
+      logForDebugging(
+        `[bridge:repl] heartbeatWork fatal (status=${err.status}) — tearing down work item for fast re-dispatch`,
+      )
+      if (transport) {
+        const seq = transport.getLastSequenceNum()
+        if (seq > lastTransportSequenceNum) {
+          lastTransportSequenceNum = seq
+        }
+        transport.close()
+        transport = null
+      }
+      flushGate.drop()
+      // force=false → server re-queues. Likely already expired, but
+      // idempotent and makes re-dispatch immediate if not.
+      if (currentWorkId) {
+        void api
+          .stopWork(environmentId, currentWorkId, false)
+          .catch((e: unknown) => {
+            logForDebugging(
+              `[bridge:repl] stopWork after heartbeat fatal: ${errorMessage(e)}`,
+            )
+          })
+      }
+      currentWorkId = null
+      currentIngressToken = null
+      wakePollLoop()
+      onStateChange?.(
+        'reconnecting',
+        'Work item lease expired, fetching fresh token',
+      )
+    },
+    async onEnvironmentLost() {
+      const success = await reconnectEnvironmentWithSession()
+      if (!success) {
+        return null
+      }
+      return { environmentId, environmentSecret }
+    },
+    onWorkReceived: (
+      workSessionId: string,
+      ingressToken: string,
+      workId: string,
+      serverUseCcrV2: boolean,
+    ) => {
+      // When new work arrives while a transport is already open, the
+      // server has decided to re-dispatch (e.g. token rotation, server
+      // restart). Close the existing transport and reconnect — discarding
+      // the work causes a stuck 'reconnecting' state if the old WS dies
+      // shortly after (the server won't re-dispatch a work item it
+      // already delivered).
+      // ingressToken (JWT) is stored for heartbeat auth (both v1 and v2).
+      // Transport auth diverges — see the v1/v2 split below.
+      if (transport?.isConnectedStatus()) {
+        logForDebugging(
+          `[bridge:repl] Work received while transport connected, replacing with fresh token (workId=${workId})`,
+        )
+      }
+
+      logForDebugging(
+        `[bridge:repl] Work received: workId=${workId} workSessionId=${workSessionId} currentSessionId=${currentSessionId} match=${sameSessionId(workSessionId, currentSessionId)}`,
+      )
+
+      // Refresh the crash-recovery pointer's mtime. Staleness checks file
+      // mtime (not embedded timestamp) so this re-write bumps the clock —
+      // a 5h+ session that crashes still has a fresh pointer. Fires once
+      // per work dispatch (infrequent — bounded by user message rate).
+      void writeBridgePointer(dir, {
+        sessionId: currentSessionId,
+        environmentId,
+        source: 'repl',
+      })
+
+      // Reject foreign session IDs — the server shouldn't assign sessions
+      // from other environments. Since we create env+session as a pair,
+      // a mismatch indicates an unexpected server-side reassignment.
+      //
+      // Compare by underlying UUID, not by tagged-ID prefix. When CCR
+      // v2's compat layer serves the session, createBridgeSession gets
+      // session_* from the v1-facing API (compat/convert.go:41) but the
+      // infrastructure layer delivers cse_* in the work queue
+      // (container_manager.go:129). Same UUID, different tag.
+      if (!sameSessionId(workSessionId, currentSessionId)) {
+        logForDebugging(
+          `[bridge:repl] Rejecting foreign session: expected=${currentSessionId} got=${workSessionId}`,
+        )
+        return
+      }
+
+      currentWorkId = workId
+      currentIngressToken = ingressToken
+
+      // Server decides per-session (secret.use_code_sessions from the work
+      // secret, threaded through runWorkPollLoop). The env var is an ant-dev
+      // override for forcing v2 before the server flag is on for your user —
+      // requires ccr_v2_compat_enabled server-side or registerWorker 404s.
+      //
+      // Kept separate from CLAUDE_CODE_USE_CCR_V2 (the child-SDK transport
+      // selector set by sessionRunner/environment-manager) to avoid the
+      // inheritance hazard in spawn mode where the parent's orchestrator
+      // var would leak into a v1 child.
+      const useCcrV2 =
+        serverUseCcrV2 || isEnvTruthy(process.env.CLAUDE_BRIDGE_USE_CCR_V2)
+
+      // Auth is the one place v1 and v2 diverge hard:
+      //
+      // - v1 (Session-Ingress): accepts OAuth OR JWT. We prefer OAuth
+      //   because the standard OAuth refresh flow handles expiry — no
+      //   separate JWT refresh scheduler needed.
+      //
+      // - v2 (CCR /worker/*): REQUIRES the JWT. register_worker.go:32
+      //   validates the session_id claim, which OAuth tokens don't carry.
+      //   The JWT from the work secret has both that claim and the worker
+      //   role (environment_auth.py:856). JWT refresh: when it expires the
+      //   server re-dispatches work with a fresh one, and onWorkReceived
+      //   fires again. createV2ReplTransport stores it via
+      //   updateSessionIngressAuthToken() before touching the network.
+      let v1OauthToken: string | undefined
+      if (!useCcrV2) {
+        v1OauthToken = getOAuthToken()
+        if (!v1OauthToken) {
+          logForDebugging(
+            '[bridge:repl] No OAuth token available for session ingress, skipping work',
+          )
+          return
+        }
+        updateSessionIngressAuthToken(v1OauthToken)
+      }
+      logEvent('tengu_bridge_repl_work_received', {})
+
+      // Close the previous transport. Nullify BEFORE calling close() so
+      // the close callback doesn't treat the programmatic close as
+      // "session ended normally" and trigger a full teardown.
+      if (transport) {
+        const oldTransport = transport
+        transport = null
+        // Capture the SSE sequence high-water mark so the next transport
+        // resumes the stream instead of replaying from seq 0. Use max() —
+        // a transport that died early (never received any frames) would
+        // otherwise reset a non-zero mark back to 0.
+        const oldSeq = oldTransport.getLastSequenceNum()
+        if (oldSeq > lastTransportSequenceNum) {
+          lastTransportSequenceNum = oldSeq
+        }
+        oldTransport.close()
+      }
+      // Reset flush state — the old flush (if any) is no longer relevant.
+      // Preserve pending messages so they're drained after the new
+      // transport's flush completes (the hook has already advanced its
+      // lastWrittenIndex and won't re-send them).
+      flushGate.deactivate()
+
+      // Closure adapter over the shared handleServerControlRequest —
+      // captures transport/currentSessionId so the transport.setOnData
+      // callback below doesn't need to thread them through.
+      const onServerControlRequest = (request: SDKControlRequest): void =>
+        handleServerControlRequest(request, {
+          transport,
+          sessionId: currentSessionId,
+          onInterrupt,
+          onSetModel,
+          onSetMaxThinkingTokens,
+          onSetPermissionMode,
+        })
+
+      let initialFlushDone = false
+
+      // Wire callbacks onto a freshly constructed transport and connect.
+      // Extracted so the (sync) v1 and (async) v2 construction paths can
+      // share the identical callback + flush machinery.
+      const wireTransport = (newTransport: ReplBridgeTransport): void => {
+        transport = newTransport
+
+        newTransport.setOnConnect(() => {
+          // Guard: if transport was replaced by a newer onWorkReceived call
+          // while the WS was connecting, ignore this stale callback.
+          if (transport !== newTransport) return
+
+          logForDebugging('[bridge:repl] Ingress transport connected')
+          logEvent('tengu_bridge_repl_ws_connected', {})
+
+          // Update the env var with the latest OAuth token so POST writes
+          // (which read via getSessionIngressAuthToken()) use a fresh token.
+          // v2 skips this — createV2ReplTransport already stored the JWT,
+          // and overwriting it with OAuth would break subsequent /worker/*
+          // requests (session_id claim check).
+          if (!useCcrV2) {
+            const freshToken = getOAuthToken()
+            if (freshToken) {
+              updateSessionIngressAuthToken(freshToken)
+            }
+          }
+
+          // Reset teardownStarted so future teardowns are not blocked.
+          teardownStarted = false
+
+          // Flush initial messages only on first connect, not on every
+          // WS reconnection. Re-flushing would cause duplicate messages.
+          // IMPORTANT: onStateChange('connected') is deferred until the
+          // flush completes. This prevents writeMessages() from sending
+          // new messages that could arrive at the server interleaved with
+          // the historical messages, and delays the web UI from showing
+          // the session as active until history is persisted.
+          if (
+            !initialFlushDone &&
+            initialMessages &&
+            initialMessages.length > 0
+          ) {
+            initialFlushDone = true
+
+            // Cap the initial flush to the most recent N messages. The full
+            // history is UI-only (model doesn't see it) and large replays cause
+            // slow session-ingress persistence (each event is a threadstore write)
+            // plus elevated Firestore pressure. A 0 or negative cap disables it.
+            const historyCap = initialHistoryCap
+            const eligibleMessages = initialMessages.filter(
+              m =>
+                isEligibleBridgeMessage(m) &&
+                !previouslyFlushedUUIDs?.has(m.uuid),
+            )
+            const cappedMessages =
+              historyCap > 0 && eligibleMessages.length > historyCap
+                ? eligibleMessages.slice(-historyCap)
+                : eligibleMessages
+            if (cappedMessages.length < eligibleMessages.length) {
+              logForDebugging(
+                `[bridge:repl] Capped initial flush: ${eligibleMessages.length} -> ${cappedMessages.length} (cap=${historyCap})`,
+              )
+              logEvent('tengu_bridge_repl_history_capped', {
+                eligible_count: eligibleMessages.length,
+                capped_count: cappedMessages.length,
+              })
+            }
+            const sdkMessages = toSDKMessages(cappedMessages)
+            if (sdkMessages.length > 0) {
+              logForDebugging(
+                `[bridge:repl] Flushing ${sdkMessages.length} initial message(s) via transport`,
+              )
+              const events = sdkMessages.map(sdkMsg => ({
+                ...sdkMsg,
+                session_id: currentSessionId,
+              }))
+              const dropsBefore = newTransport.droppedBatchCount
+              void newTransport
+                .writeBatch(events)
+                .then(() => {
+                  // If any batch was dropped during this flush (SI down for
+                  // maxConsecutiveFailures attempts), flush() still resolved
+                  // normally but the events were NOT delivered. Don't mark
+                  // UUIDs as flushed — keep them eligible for re-send on the
+                  // next onWorkReceived (JWT refresh re-dispatch, line ~1144).
+                  if (newTransport.droppedBatchCount > dropsBefore) {
+                    logForDebugging(
+                      `[bridge:repl] Initial flush dropped ${newTransport.droppedBatchCount - dropsBefore} batch(es) — not marking ${sdkMessages.length} UUID(s) as flushed`,
+                    )
+                    return
+                  }
+                  if (previouslyFlushedUUIDs) {
+                    for (const sdkMsg of sdkMessages) {
+                      if (sdkMsg.uuid) {
+                        previouslyFlushedUUIDs.add(sdkMsg.uuid)
+                      }
+                    }
+                  }
+                })
+                .catch(e =>
+                  logForDebugging(`[bridge:repl] Initial flush failed: ${e}`),
+                )
+                .finally(() => {
+                  // Guard: if transport was replaced during the flush,
+                  // don't signal connected or drain — the new transport
+                  // owns the lifecycle now.
+                  if (transport !== newTransport) return
+                  drainFlushGate()
+                  onStateChange?.('connected')
+                })
+            } else {
+              // All initial messages were already flushed (filtered by
+              // previouslyFlushedUUIDs). No flush POST needed — clear
+              // the flag and signal connected immediately. This is the
+              // first connect for this transport (inside !initialFlushDone),
+              // so no flush POST is in-flight — the flag was set before
+              // connect() and must be cleared here.
+              drainFlushGate()
+              onStateChange?.('connected')
+            }
+          } else if (!flushGate.active) {
+            // No initial messages or already flushed on first connect.
+            // WS auto-reconnect path — only signal connected if no flush
+            // POST is in-flight. If one is, .finally() owns the lifecycle.
+            onStateChange?.('connected')
+          }
+        })
+
+        newTransport.setOnData(data => {
+          handleIngressMessage(
+            data,
+            recentPostedUUIDs,
+            recentInboundUUIDs,
+            onInboundMessage,
+            onPermissionResponse,
+            onServerControlRequest,
+          )
+        })
+
+        // Body lives at initBridgeCore scope so /bridge-kick can call it
+        // directly via debugFireClose. All referenced closures (transport,
+        // wakePollLoop, flushGate, reconnectEnvironmentWithSession, etc.)
+        // are already at that scope. The only lexical dependency on
+        // wireTransport was `newTransport.getLastSequenceNum()` — but after
+        // the guard below passes we know transport === newTransport.
+        debugFireClose = handleTransportPermanentClose
+        newTransport.setOnClose(closeCode => {
+          // Guard: if transport was replaced, ignore stale close.
+          if (transport !== newTransport) return
+          handleTransportPermanentClose(closeCode)
+        })
+
+        // Start the flush gate before connect() to cover the WS handshake
+        // window. Between transport assignment and setOnConnect firing,
+        // writeMessages() could send messages via HTTP POST before the
+        // initial flush starts. Starting the gate here ensures those
+        // calls are queued. If there are no initial messages, the gate
+        // stays inactive.
+        if (
+          !initialFlushDone &&
+          initialMessages &&
+          initialMessages.length > 0
+        ) {
+          flushGate.start()
+        }
+
+        newTransport.connect()
+      } // end wireTransport
+
+      // Bump unconditionally — ANY new transport (v1 or v2) invalidates an
+      // in-flight v2 handshake. Also bumped in doReconnect().
+      v2Generation++
+
+      if (useCcrV2) {
+        // workSessionId is the cse_* form (infrastructure-layer ID from the
+        // work queue), which is what /v1/code/sessions/{id}/worker/* wants.
+        // The session_* form (currentSessionId) is NOT usable here —
+        // handler/convert.go:30 validates TagCodeSession.
+        const sessionUrl = buildCCRv2SdkUrl(baseUrl, workSessionId)
+        const thisGen = v2Generation
+        logForDebugging(
+          `[bridge:repl] CCR v2: sessionUrl=${sessionUrl} session=${workSessionId} gen=${thisGen}`,
+        )
+        void createV2ReplTransport({
+          sessionUrl,
+          ingressToken,
+          sessionId: workSessionId,
+          initialSequenceNum: lastTransportSequenceNum,
+        }).then(
+          t => {
+            // Teardown started while registerWorker was in flight. Teardown
+            // saw transport === null and skipped close(); installing now
+            // would leak CCRClient heartbeat timers and reset
+            // teardownStarted via wireTransport's side effects.
+            if (pollController.signal.aborted) {
+              t.close()
+              return
+            }
+            // onWorkReceived may have fired again while registerWorker()
+            // was in flight (server re-dispatch with a fresh JWT). The
+            // transport !== null check alone gets the race wrong when BOTH
+            // attempts saw transport === null — it keeps the first resolver
+            // (stale epoch) and discards the second (correct epoch). The
+            // generation check catches it regardless of transport state.
+            if (thisGen !== v2Generation) {
+              logForDebugging(
+                `[bridge:repl] CCR v2: discarding stale handshake gen=${thisGen} current=${v2Generation}`,
+              )
+              t.close()
+              return
+            }
+            wireTransport(t)
+          },
+          (err: unknown) => {
+            logForDebugging(
+              `[bridge:repl] CCR v2: createV2ReplTransport failed: ${errorMessage(err)}`,
+              { level: 'error' },
+            )
+            logEvent('tengu_bridge_repl_ccr_v2_init_failed', {})
+            // If a newer attempt is in flight or already succeeded, don't
+            // touch its work item — our failure is irrelevant.
+            if (thisGen !== v2Generation) return
+            // Release the work item so the server re-dispatches immediately
+            // instead of waiting for its own timeout. currentWorkId was set
+            // above; without this, the session looks stuck to the user.
+            if (currentWorkId) {
+              void api
+                .stopWork(environmentId, currentWorkId, false)
+                .catch((e: unknown) => {
+                  logForDebugging(
+                    `[bridge:repl] stopWork after v2 init failure: ${errorMessage(e)}`,
+                  )
+                })
+              currentWorkId = null
+              currentIngressToken = null
+            }
+            wakePollLoop()
+          },
+        )
+      } else {
+        // v1: HybridTransport (WS reads + POST writes to Session-Ingress).
+        // autoReconnect is true (default) — when the WS dies, the transport
+        // reconnects automatically with exponential backoff. POST writes
+        // continue during reconnection (they use getSessionIngressAuthToken()
+        // independently of WS state). The poll loop remains as a secondary
+        // fallback if the reconnect budget is exhausted (10 min).
+        //
+        // Auth: uses OAuth tokens directly instead of the JWT from the work
+        // secret. refreshHeaders picks up the latest OAuth token on each
+        // WS reconnect attempt.
+        const wsUrl = buildSdkUrl(sessionIngressUrl, workSessionId)
+        logForDebugging(`[bridge:repl] Ingress URL: ${wsUrl}`)
+        logForDebugging(
+          `[bridge:repl] Creating HybridTransport: session=${workSessionId}`,
+        )
+        // v1OauthToken was validated non-null above (we'd have returned early).
+        const oauthToken = v1OauthToken ?? ''
+        wireTransport(
+          createV1ReplTransport(
+            new HybridTransport(
+              new URL(wsUrl),
+              {
+                Authorization: `Bearer ${oauthToken}`,
+                'anthropic-version': '2023-06-01',
+              },
+              workSessionId,
+              () => ({
+                Authorization: `Bearer ${getOAuthToken() ?? oauthToken}`,
+                'anthropic-version': '2023-06-01',
+              }),
+              // Cap retries so a persistently-failing session-ingress can't
+              // pin the uploader drain loop for the lifetime of the bridge.
+              // 50 attempts ≈ 20 min (15s POST timeout + 8s backoff + jitter
+              // per cycle at steady state). Bridge-only — 1P keeps indefinite.
+              {
+                maxConsecutiveFailures: 50,
+                isBridge: true,
+                onBatchDropped: () => {
+                  onStateChange?.(
+                    'reconnecting',
+                    'Lost sync with Remote Control — events could not be delivered',
+                  )
+                  // SI has been down ~20 min. Wake the poll loop so that when
+                  // SI recovers, next poll → onWorkReceived → fresh transport
+                  // → initial flush succeeds → onStateChange('connected') at
+                  // ~line 1420. Without this, state stays 'reconnecting' even
+                  // after SI recovers — daemon.ts:437 denies all permissions,
+                  // useReplBridge.ts:311 keeps replBridgeSessionActive=false.
+                  // If the env was archived during the outage, poll 404 →
+                  // onEnvironmentLost recovery path handles it.
+                  wakePollLoop()
+                },
+              },
+            ),
+          ),
+        )
+      }
+    },
+  }
+  void startWorkPollLoop(pollOpts)
+
+  // Perpetual mode: hourly mtime refresh of the crash-recovery pointer.
+  // The onWorkReceived refresh only fires per user prompt — a
+  // daemon idle for >4h would have a stale pointer, and the next restart
+  // would clear it (readBridgePointer TTL check) → fresh session. The
+  // standalone bridge (bridgeMain.ts) has an identical hourly timer.
+  const pointerRefreshTimer = perpetual
+    ? setInterval(() => {
+        // doReconnect() reassigns currentSessionId/environmentId non-
+        // atomically (env at ~:634, session at ~:719, awaits in between).
+        // If this timer fires in that window, its fire-and-forget write can
+        // race with (and overwrite) doReconnect's own pointer write at ~:740,
+        // leaving the pointer at the now-archived old session. doReconnect
+        // writes the pointer itself, so skipping here is free.
+        if (reconnectPromise) return
+        void writeBridgePointer(dir, {
+          sessionId: currentSessionId,
+          environmentId,
+          source: 'repl',
+        })
+      }, 60 * 60_000)
+    : null
+  pointerRefreshTimer?.unref?.()
+
+  // Push a silent keep_alive frame on a fixed interval so upstream proxies
+  // and the session-ingress layer don't GC an otherwise-idle remote control
+  // session. The keep_alive type is filtered before reaching any client UI
+  // (Query.ts drops it; web/iOS/Android never see it in their message loop).
+  // Interval comes from GrowthBook (tengu_bridge_poll_interval_config
+  // session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
+  const keepAliveIntervalMs =
+    getPollIntervalConfig().session_keepalive_interval_v2_ms
+  const keepAliveTimer =
+    keepAliveIntervalMs > 0
+      ? setInterval(() => {
+          if (!transport) return
+          logForDebugging('[bridge:repl] keep_alive sent')
+          void transport.write({ type: 'keep_alive' }).catch((err: unknown) => {
+            logForDebugging(
+              `[bridge:repl] keep_alive write failed: ${errorMessage(err)}`,
+            )
+          })
+        }, keepAliveIntervalMs)
+      : null
+  keepAliveTimer?.unref?.()
+
+  // Shared teardown sequence used by both cleanup registration and
+  // the explicit teardown() method on the returned handle.
+  let teardownStarted = false
+  doTeardownImpl = async (): Promise<void> => {
+    if (teardownStarted) {
+      logForDebugging(
+        `[bridge:repl] Teardown already in progress, skipping duplicate call env=${environmentId} session=${currentSessionId}`,
+      )
+      return
+    }
+    teardownStarted = true
+    const teardownStart = Date.now()
+    logForDebugging(
+      `[bridge:repl] Teardown starting: env=${environmentId} session=${currentSessionId} workId=${currentWorkId ?? 'none'} transportState=${transport?.getStateLabel() ?? 'null'}`,
+    )
+
+    if (pointerRefreshTimer !== null) {
+      clearInterval(pointerRefreshTimer)
+    }
+    if (keepAliveTimer !== null) {
+      clearInterval(keepAliveTimer)
+    }
+    if (sigusr2Handler) {
+      process.off('SIGUSR2', sigusr2Handler)
+    }
+    if (process.env.USER_TYPE === 'ant') {
+      clearBridgeDebugHandle()
+      debugFireClose = null
+    }
+    pollController.abort()
+    logForDebugging('[bridge:repl] Teardown: poll loop aborted')
+
+    // Capture the live transport's seq BEFORE close() — close() is sync
+    // (just aborts the SSE fetch) and does NOT invoke onClose, so the
+    // setOnClose capture path never runs for explicit teardown.
+    // Without this, getSSESequenceNum() after teardown returns the stale
+    // lastTransportSequenceNum (captured at the last transport swap), and
+    // daemon callers persisting that value lose all events since then.
+    if (transport) {
+      const finalSeq = transport.getLastSequenceNum()
+      if (finalSeq > lastTransportSequenceNum) {
+        lastTransportSequenceNum = finalSeq
+      }
+    }
+
+    if (perpetual) {
+      // Perpetual teardown is LOCAL-ONLY — do not send result, do not call
+      // stopWork, do not close the transport. All of those signal the
+      // server (and any mobile/attach subscribers) that the session is
+      // ending. Instead: stop polling, let the socket die with the
+      // process; the backend times the work-item lease back to pending on
+      // its own (TTL 300s). Next daemon start reads the pointer and
+      // reconnectSession re-queues work.
+      transport = null
+      flushGate.drop()
+      // Refresh the pointer mtime so that sessions lasting longer than
+      // BRIDGE_POINTER_TTL_MS (4h) don't appear stale on next start.
+      await writeBridgePointer(dir, {
+        sessionId: currentSessionId,
+        environmentId,
+        source: 'repl',
+      })
+      logForDebugging(
+        `[bridge:repl] Teardown (perpetual): leaving env=${environmentId} session=${currentSessionId} alive on server, duration=${Date.now() - teardownStart}ms`,
+      )
+      return
+    }
+
+    // Fire the result message, then archive, THEN close. transport.write()
+    // only enqueues (SerialBatchEventUploader resolves on buffer-add); the
+    // stopWork/archive latency (~200-500ms) is the drain window for the
+    // result POST. Closing BEFORE archive meant relying on HybridTransport's
+    // void-ed 3s grace period, which nothing awaits — forceExit can kill the
+    // socket mid-POST. Same reorder as remoteBridgeCore.ts teardown (#22803).
+    const teardownTransport = transport
+    transport = null
+    flushGate.drop()
+    if (teardownTransport) {
+      void teardownTransport.write(makeResultMessage(currentSessionId))
+    }
+
+    const stopWorkP = currentWorkId
+      ? api
+          .stopWork(environmentId, currentWorkId, true)
+          .then(() => {
+            logForDebugging('[bridge:repl] Teardown: stopWork completed')
+          })
+          .catch((err: unknown) => {
+            logForDebugging(
+              `[bridge:repl] Teardown stopWork failed: ${errorMessage(err)}`,
+            )
+          })
+      : Promise.resolve()
+
+    // Run stopWork and archiveSession in parallel. gracefulShutdown.ts:407
+    // races runCleanupFunctions() against 2s (NOT the 5s outer failsafe),
+    // so archive is capped at 1.5s at the injection site to stay under budget.
+    // archiveSession is contractually no-throw; the injected implementations
+    // log their own success/failure internally.
+    await Promise.all([stopWorkP, archiveSession(currentSessionId)])
+
+    teardownTransport?.close()
+    logForDebugging('[bridge:repl] Teardown: transport closed')
+
+    await api.deregisterEnvironment(environmentId).catch((err: unknown) => {
+      logForDebugging(
+        `[bridge:repl] Teardown deregister failed: ${errorMessage(err)}`,
+      )
+    })
+
+    // Clear the crash-recovery pointer — explicit disconnect or clean REPL
+    // exit means the user is done with this session. Crash/kill-9 never
+    // reaches this line, leaving the pointer for next-launch recovery.
+    await clearBridgePointer(dir)
+
+    logForDebugging(
+      `[bridge:repl] Teardown complete: env=${environmentId} duration=${Date.now() - teardownStart}ms`,
+    )
+  }
+
+  // 8. Register cleanup for graceful shutdown
+  const unregister = registerCleanup(() => doTeardownImpl?.())
+
+  logForDebugging(
+    `[bridge:repl] Ready: env=${environmentId} session=${currentSessionId}`,
+  )
+  onStateChange?.('ready')
+
+  return {
+    get bridgeSessionId() {
+      return currentSessionId
+    },
+    get environmentId() {
+      return environmentId
+    },
+    getSSESequenceNum() {
+      // lastTransportSequenceNum only updates when a transport is CLOSED
+      // (captured at swap/onClose). During normal operation the CURRENT
+      // transport's live seq isn't reflected there. Merge both so callers
+      // (e.g. daemon persistState()) get the actual high-water mark.
+      const live = transport?.getLastSequenceNum() ?? 0
+      return Math.max(lastTransportSequenceNum, live)
+    },
+    sessionIngressUrl,
+    writeMessages(messages) {
+      // Filter to user/assistant messages that haven't already been sent.
+      // Two layers of dedup:
+      //  - initialMessageUUIDs: messages sent as session creation events
+      //  - recentPostedUUIDs: messages recently sent via POST
+      const filtered = messages.filter(
+        m =>
+          isEligibleBridgeMessage(m) &&
+          !initialMessageUUIDs.has(m.uuid) &&
+          !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+
+      // Fire onUserMessage for title derivation. Scan before the flushGate
+      // check — prompts are title-worthy even if they queue behind the
+      // initial history flush. Keeps calling on every title-worthy message
+      // until the callback returns true; the caller owns the policy.
+      if (!userMessageCallbackDone) {
+        for (const m of filtered) {
+          const text = extractTitleText(m)
+          if (text !== undefined && onUserMessage?.(text, currentSessionId)) {
+            userMessageCallbackDone = true
+            break
+          }
+        }
+      }
+
+      // Queue messages while the initial flush is in progress to prevent
+      // them from arriving at the server interleaved with history.
+      if (flushGate.enqueue(...filtered)) {
+        logForDebugging(
+          `[bridge:repl] Queued ${filtered.length} message(s) during initial flush`,
+        )
+        return
+      }
+
+      if (!transport) {
+        const types = filtered.map(m => m.type).join(',')
+        logForDebugging(
+          `[bridge:repl] Transport not configured, dropping ${filtered.length} message(s) [${types}] for session=${currentSessionId}`,
+          { level: 'warn' },
+        )
+        return
+      }
+
+      // Track in the bounded ring buffer for echo filtering and dedup.
+      for (const msg of filtered) {
+        recentPostedUUIDs.add(msg.uuid)
+      }
+
+      logForDebugging(
+        `[bridge:repl] Sending ${filtered.length} message(s) via transport`,
+      )
+
+      // Convert to SDK format and send via HTTP POST (HybridTransport).
+      // The web UI receives them via the subscribe WebSocket.
+      const sdkMessages = toSDKMessages(filtered)
+      const events = sdkMessages.map(sdkMsg => ({
+        ...sdkMsg,
+        session_id: currentSessionId,
+      }))
+      void transport.writeBatch(events)
+    },
+    writeSdkMessages(messages) {
+      // Daemon path: query() already yields SDKMessage, skip conversion.
+      // Still run echo dedup (server bounces writes back on the WS).
+      // No initialMessageUUIDs filter — daemon has no initial messages.
+      // No flushGate — daemon never starts it (no initial flush).
+      const filtered = messages.filter(
+        m => !m.uuid || !recentPostedUUIDs.has(m.uuid),
+      )
+      if (filtered.length === 0) return
+      if (!transport) {
+        logForDebugging(
+          `[bridge:repl] Transport not configured, dropping ${filtered.length} SDK message(s) for session=${currentSessionId}`,
+          { level: 'warn' },
+        )
+        return
+      }
+      for (const msg of filtered) {
+        if (msg.uuid) recentPostedUUIDs.add(msg.uuid)
+      }
+      const events = filtered.map(m => ({ ...m, session_id: currentSessionId }))
+      void transport.writeBatch(events)
+    },
+    sendControlRequest(request: SDKControlRequest) {
+      if (!transport) {
+        logForDebugging(
+          '[bridge:repl] Transport not configured, skipping control_request',
+        )
+        return
+      }
+      const event = { ...request, session_id: currentSessionId }
+      void transport.write(event)
+      logForDebugging(
+        `[bridge:repl] Sent control_request request_id=${request.request_id}`,
+      )
+    },
+    sendControlResponse(response: SDKControlResponse) {
+      if (!transport) {
+        logForDebugging(
+          '[bridge:repl] Transport not configured, skipping control_response',
+        )
+        return
+      }
+      const event = { ...response, session_id: currentSessionId }
+      void transport.write(event)
+      logForDebugging('[bridge:repl] Sent control_response')
+    },
+    sendControlCancelRequest(requestId: string) {
+      if (!transport) {
+        logForDebugging(
+          '[bridge:repl] Transport not configured, skipping control_cancel_request',
+        )
+        return
+      }
+      const event = {
+        type: 'control_cancel_request' as const,
+        request_id: requestId,
+        session_id: currentSessionId,
+      }
+      void transport.write(event)
+      logForDebugging(
+        `[bridge:repl] Sent control_cancel_request request_id=${requestId}`,
+      )
+    },
+    sendResult() {
+      if (!transport) {
+        logForDebugging(
+          `[bridge:repl] sendResult: skipping, transport not configured session=${currentSessionId}`,
+        )
+        return
+      }
+      void transport.write(makeResultMessage(currentSessionId))
+      logForDebugging(
+        `[bridge:repl] Sent result for session=${currentSessionId}`,
+      )
+    },
+    async teardown() {
+      unregister()
+      await doTeardownImpl?.()
+      logForDebugging('[bridge:repl] Torn down')
+      logEvent('tengu_bridge_repl_teardown', {})
+    },
+  }
+}
+
+/**
+ * Persistent poll loop for work items. Runs in the background for the
+ * lifetime of the bridge connection.
+ *
+ * When a work item arrives, acknowledges it and calls onWorkReceived
+ * with the session ID and ingress token (which connects the ingress
+ * WebSocket). Then continues polling — the server will dispatch a new
+ * work item if the ingress WebSocket drops, allowing automatic
+ * reconnection without tearing down the bridge.
+ */
+async function startWorkPollLoop({
+  api,
+  getCredentials,
+  signal,
+  onStateChange,
+  onWorkReceived,
+  onEnvironmentLost,
+  getWsState,
+  isAtCapacity,
+  capacitySignal,
+  onFatalError,
+  getPollIntervalConfig = () => DEFAULT_POLL_CONFIG,
+  getHeartbeatInfo,
+  onHeartbeatFatal,
+}: {
+  api: BridgeApiClient
+  getCredentials: () => { environmentId: string; environmentSecret: string }
+  signal: AbortSignal
+  onStateChange?: (state: BridgeState, detail?: string) => void
+  onWorkReceived: (
+    sessionId: string,
+    ingressToken: string,
+    workId: string,
+    useCodeSessions: boolean,
+  ) => void
+  /** Called when the environment has been deleted. Returns new credentials or null. */
+  onEnvironmentLost?: () => Promise<{
+    environmentId: string
+    environmentSecret: string
+  } | null>
+  /** Returns the current WebSocket readyState label for diagnostic logging. */
+  getWsState?: () => string
+  /**
+   * Returns true when the caller cannot accept new work (transport already
+   * connected). When true, the loop polls at the configured at-capacity
+   * interval as a heartbeat only. Server-side BRIDGE_LAST_POLL_TTL is
+   * 4 hours — anything shorter than that is sufficient for liveness.
+   */
+  isAtCapacity?: () => boolean
+  /**
+   * Produces a signal that aborts when capacity frees up (transport lost),
+   * merged with the loop signal. Used to interrupt the at-capacity sleep
+   * so recovery polling starts immediately.
+   */
+  capacitySignal?: () => CapacitySignal
+  /** Called on unrecoverable errors (e.g. server-side expiry) to trigger full teardown. */
+  onFatalError?: () => void
+  /** Poll interval config getter — defaults to DEFAULT_POLL_CONFIG. */
+  getPollIntervalConfig?: () => PollIntervalConfig
+  /**
+   * Returns the current work ID and session ingress token for heartbeat.
+   * When null, heartbeat is not possible (no active work item).
+   */
+  getHeartbeatInfo?: () => {
+    environmentId: string
+    workId: string
+    sessionToken: string
+  } | null
+  /**
+   * Called when heartbeatWork throws BridgeFatalError (401/403/404/410 —
+   * JWT expired or work item gone). Caller should tear down the transport
+   * + work state so isAtCapacity() flips to false and the loop fast-polls
+   * for the server's re-dispatched work item. When provided, the loop
+   * SKIPS the at-capacity backoff sleep (which would otherwise cause a
+   * ~10-minute dead window before recovery). When omitted, falls back to
+   * the backoff sleep to avoid a tight poll+heartbeat loop.
+   */
+  onHeartbeatFatal?: (err: BridgeFatalError) => void
+}): Promise<void> {
+  const MAX_ENVIRONMENT_RECREATIONS = 3
+
+  logForDebugging(
+    `[bridge:repl] Starting work poll loop for env=${getCredentials().environmentId}`,
+  )
+
+  let consecutiveErrors = 0
+  let firstErrorTime: number | null = null
+  let lastPollErrorTime: number | null = null
+  let environmentRecreations = 0
+  // Set when the at-capacity sleep overruns its deadline by a large margin
+  // (process suspension). Consumed at the top of the next iteration to
+  // force one fast-poll cycle — isAtCapacity() is `transport !== null`,
+  // which stays true while the transport auto-reconnects, so the poll
+  // loop would otherwise go straight back to a 10-minute sleep on a
+  // transport that may be pointed at a dead socket.
+  let suspensionDetected = false
+
+  while (!signal.aborted) {
+    // Capture credentials outside try so the catch block can detect
+    // whether a concurrent reconnection replaced the environment.
+    const { environmentId: envId, environmentSecret: envSecret } =
+      getCredentials()
+    const pollConfig = getPollIntervalConfig()
+    try {
+      const work = await api.pollForWork(
+        envId,
+        envSecret,
+        signal,
+        pollConfig.reclaim_older_than_ms,
+      )
+
+      // A successful poll proves the env is genuinely healthy — reset the
+      // env-loss counter so events hours apart each start fresh. Outside
+      // the state-change guard below because onEnvLost's success path
+      // already emits 'ready'; emitting again here would be a duplicate.
+      // (onEnvLost returning creds does NOT reset this — that would break
+      // oscillation protection when the new env immediately dies.)
+      environmentRecreations = 0
+
+      // Reset error tracking on successful poll
+      if (consecutiveErrors > 0) {
+        logForDebugging(
+          `[bridge:repl] Poll recovered after ${consecutiveErrors} consecutive error(s)`,
+        )
+        consecutiveErrors = 0
+        firstErrorTime = null
+        lastPollErrorTime = null
+        onStateChange?.('ready')
+      }
+
+      if (!work) {
+        // Read-and-clear: after a detected suspension, skip the at-capacity
+        // branch exactly once. The pollForWork above already refreshed the
+        // server's BRIDGE_LAST_POLL_TTL; this fast cycle gives any
+        // re-dispatched work item a chance to land before we go back under.
+        const skipAtCapacityOnce = suspensionDetected
+        suspensionDetected = false
+        if (isAtCapacity?.() && capacitySignal && !skipAtCapacityOnce) {
+          const atCapMs = pollConfig.poll_interval_ms_at_capacity
+          // Heartbeat loops WITHOUT polling. When at-capacity polling is also
+          // enabled (atCapMs > 0), the loop tracks a deadline and breaks out
+          // to poll at that interval — heartbeat and poll compose instead of
+          // one suppressing the other. Breaks out when:
+          //   - Poll deadline reached (atCapMs > 0 only)
+          //   - Auth fails (JWT expired → poll refreshes tokens)
+          //   - Capacity wake fires (transport lost → poll for new work)
+          //   - Heartbeat config disabled (GrowthBook update)
+          //   - Loop aborted (shutdown)
+          if (
+            pollConfig.non_exclusive_heartbeat_interval_ms > 0 &&
+            getHeartbeatInfo
+          ) {
+            logEvent('tengu_bridge_heartbeat_mode_entered', {
+              heartbeat_interval_ms:
+                pollConfig.non_exclusive_heartbeat_interval_ms,
+            })
+            // Deadline computed once at entry — GB updates to atCapMs don't
+            // shift an in-flight deadline (next entry picks up the new value).
+            const pollDeadline = atCapMs > 0 ? Date.now() + atCapMs : null
+            let needsBackoff = false
+            let hbCycles = 0
+            while (
+              !signal.aborted &&
+              isAtCapacity() &&
+              (pollDeadline === null || Date.now() < pollDeadline)
+            ) {
+              const hbConfig = getPollIntervalConfig()
+              if (hbConfig.non_exclusive_heartbeat_interval_ms <= 0) break
+
+              const info = getHeartbeatInfo()
+              if (!info) break
+
+              // Capture capacity signal BEFORE the async heartbeat call so
+              // a transport loss during the HTTP request is caught by the
+              // subsequent sleep.
+              const cap = capacitySignal()
+
+              try {
+                await api.heartbeatWork(
+                  info.environmentId,
+                  info.workId,
+                  info.sessionToken,
+                )
+              } catch (err) {
+                logForDebugging(
+                  `[bridge:repl:heartbeat] Failed: ${errorMessage(err)}`,
+                )
+                if (err instanceof BridgeFatalError) {
+                  cap.cleanup()
+                  logEvent('tengu_bridge_heartbeat_error', {
+                    status:
+                      err.status as unknown as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+                    error_type: (err.status === 401 || err.status === 403
+                      ? 'auth_failed'
+                      : 'fatal') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+                  })
+                  // JWT expired (401/403) or work item gone (404/410).
+                  // Either way the current transport is dead — SSE
+                  // reconnects and CCR writes will fail on the same
+                  // stale token. If the caller gave us a recovery hook,
+                  // tear down work state and skip backoff: isAtCapacity()
+                  // flips to false, next outer-loop iteration fast-polls
+                  // for the server's re-dispatched work item. Without
+                  // the hook, backoff to avoid tight poll+heartbeat loop.
+                  if (onHeartbeatFatal) {
+                    onHeartbeatFatal(err)
+                    logForDebugging(
+                      `[bridge:repl:heartbeat] Fatal (status=${err.status}), work state cleared — fast-polling for re-dispatch`,
+                    )
+                  } else {
+                    needsBackoff = true
+                  }
+                  break
+                }
+              }
+
+              hbCycles++
+              await sleep(
+                hbConfig.non_exclusive_heartbeat_interval_ms,
+                cap.signal,
+              )
+              cap.cleanup()
+            }
+
+            const exitReason = needsBackoff
+              ? 'error'
+              : signal.aborted
+                ? 'shutdown'
+                : !isAtCapacity()
+                  ? 'capacity_changed'
+                  : pollDeadline !== null && Date.now() >= pollDeadline
+                    ? 'poll_due'
+                    : 'config_disabled'
+            logEvent('tengu_bridge_heartbeat_mode_exited', {
+              reason:
+                exitReason as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              heartbeat_cycles: hbCycles,
+            })
+
+            // On auth_failed or fatal, backoff before polling to avoid a
+            // tight poll+heartbeat loop. Fall through to the shared sleep
+            // below — it's the same capacitySignal-wrapped sleep the legacy
+            // path uses, and both need the suspension-overrun check.
+            if (!needsBackoff) {
+              if (exitReason === 'poll_due') {
+                // bridgeApi throttles empty-poll logs (EMPTY_POLL_LOG_INTERVAL=100)
+                // so the once-per-10min poll_due poll is invisible at counter=2.
+                // Log it here so verification runs see both endpoints in the debug log.
+                logForDebugging(
+                  `[bridge:repl] Heartbeat poll_due after ${hbCycles} cycles — falling through to pollForWork`,
+                )
+              }
+              continue
+            }
+          }
+          // At-capacity sleep — reached by both the legacy path (heartbeat
+          // disabled) and the heartbeat-backoff path (needsBackoff=true).
+          // Merged so the suspension detector covers both; previously the
+          // backoff path had no overrun check and could go straight back
+          // under for 10 min after a laptop wake. Use atCapMs when enabled,
+          // else the heartbeat interval as a floor (guaranteed > 0 on the
+          // backoff path) so heartbeat-only configs don't tight-loop.
+          const sleepMs =
+            atCapMs > 0
+              ? atCapMs
+              : pollConfig.non_exclusive_heartbeat_interval_ms
+          if (sleepMs > 0) {
+            const cap = capacitySignal()
+            const sleepStart = Date.now()
+            await sleep(sleepMs, cap.signal)
+            cap.cleanup()
+            // Process-suspension detector. A setTimeout overshooting its
+            // deadline by 60s means the process was suspended (laptop lid,
+            // SIGSTOP, VM pause) — even a pathological GC pause is seconds,
+            // not minutes. Early aborts (wakePollLoop → cap.signal) produce
+            // overrun < 0 and fall through. Note: this only catches sleeps
+            // that outlast their deadline; WebSocketTransport's ping
+            // interval (10s granularity) is the primary detector for shorter
+            // suspensions. This is the backstop for when that detector isn't
+            // running (transport mid-reconnect, interval stopped).
+            const overrun = Date.now() - sleepStart - sleepMs
+            if (overrun > 60_000) {
+              logForDebugging(
+                `[bridge:repl] At-capacity sleep overran by ${Math.round(overrun / 1000)}s — process suspension detected, forcing one fast-poll cycle`,
+              )
+              logEvent('tengu_bridge_repl_suspension_detected', {
+                overrun_ms: overrun,
+              })
+              suspensionDetected = true
+            }
+          }
+        } else {
+          await sleep(pollConfig.poll_interval_ms_not_at_capacity, signal)
+        }
+        continue
+      }
+
+      // Decode before type dispatch — need the JWT for the explicit ack.
+      let secret
+      try {
+        secret = decodeWorkSecret(work.secret)
+      } catch (err) {
+        logForDebugging(
+          `[bridge:repl] Failed to decode work secret: ${errorMessage(err)}`,
+        )
+        logEvent('tengu_bridge_repl_work_secret_failed', {})
+        // Can't ack (needs the JWT we failed to decode). stopWork uses OAuth.
+        // Prevents XAUTOCLAIM re-delivering this poisoned item every cycle.
+        await api.stopWork(envId, work.id, false).catch(() => {})
+        continue
+      }
+
+      // Explicitly acknowledge to prevent redelivery. Non-fatal on failure:
+      // server re-delivers, and the onWorkReceived callback handles dedup.
+      logForDebugging(`[bridge:repl] Acknowledging workId=${work.id}`)
+      try {
+        await api.acknowledgeWork(envId, work.id, secret.session_ingress_token)
+      } catch (err) {
+        logForDebugging(
+          `[bridge:repl] Acknowledge failed workId=${work.id}: ${errorMessage(err)}`,
+        )
+      }
+
+      if (work.data.type === 'healthcheck') {
+        logForDebugging('[bridge:repl] Healthcheck received')
+        continue
+      }
+
+      if (work.data.type === 'session') {
+        const workSessionId = work.data.id
+        try {
+          validateBridgeId(workSessionId, 'session_id')
+        } catch {
+          logForDebugging(
+            `[bridge:repl] Invalid session_id in work: ${workSessionId}`,
+          )
+          continue
+        }
+
+        onWorkReceived(
+          workSessionId,
+          secret.session_ingress_token,
+          work.id,
+          secret.use_code_sessions === true,
+        )
+        logForDebugging('[bridge:repl] Work accepted, continuing poll loop')
+      }
+    } catch (err) {
+      if (signal.aborted) break
+
+      // Detect permanent "environment deleted" error — no amount of
+      // retrying will recover. Re-register a new environment instead.
+      // Checked BEFORE the generic BridgeFatalError bail. pollForWork uses
+      // validateStatus: s => s < 500, so 404 is always wrapped into a
+      // BridgeFatalError by handleErrorStatus() — never an axios-shaped
+      // error. The poll endpoint's only path param is the env ID; 404
+      // unambiguously means env-gone (no-work is a 200 with null body).
+      // The server sends error.type='not_found_error' (standard Anthropic
+      // API shape), not a bridge-specific string — but status===404 is
+      // the real signal and survives body-shape changes.
+      if (
+        err instanceof BridgeFatalError &&
+        err.status === 404 &&
+        onEnvironmentLost
+      ) {
+        // If credentials have already been refreshed by a concurrent
+        // reconnection (e.g. WS close handler), the stale poll's error
+        // is expected — skip onEnvironmentLost and retry with fresh creds.
+        const currentEnvId = getCredentials().environmentId
+        if (envId !== currentEnvId) {
+          logForDebugging(
+            `[bridge:repl] Stale poll error for old env=${envId}, current env=${currentEnvId} — skipping onEnvironmentLost`,
+          )
+          consecutiveErrors = 0
+          firstErrorTime = null
+          continue
+        }
+
+        environmentRecreations++
+        logForDebugging(
+          `[bridge:repl] Environment deleted, attempting re-registration (attempt ${environmentRecreations}/${MAX_ENVIRONMENT_RECREATIONS})`,
+        )
+        logEvent('tengu_bridge_repl_env_lost', {
+          attempt: environmentRecreations,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+
+        if (environmentRecreations > MAX_ENVIRONMENT_RECREATIONS) {
+          logForDebugging(
+            `[bridge:repl] Environment re-registration limit reached (${MAX_ENVIRONMENT_RECREATIONS}), giving up`,
+          )
+          onStateChange?.(
+            'failed',
+            'Environment deleted and re-registration limit reached',
+          )
+          onFatalError?.()
+          break
+        }
+
+        onStateChange?.('reconnecting', 'environment lost, recreating session')
+        const newCreds = await onEnvironmentLost()
+        // doReconnect() makes several sequential network calls (1-5s).
+        // If the user triggered teardown during that window, its internal
+        // abort checks return false — but we need to re-check here to
+        // avoid emitting a spurious 'failed' + onFatalError() during
+        // graceful shutdown.
+        if (signal.aborted) break
+        if (newCreds) {
+          // Credentials are updated in the outer scope via
+          // reconnectEnvironmentWithSession — getCredentials() will
+          // return the fresh values on the next poll iteration.
+          // Do NOT reset environmentRecreations here — onEnvLost returning
+          // creds only proves we tried to fix it, not that the env is
+          // healthy. A successful poll (above) is the reset point; if the
+          // new env immediately dies again we still want the limit to fire.
+          consecutiveErrors = 0
+          firstErrorTime = null
+          onStateChange?.('ready')
+          logForDebugging(
+            `[bridge:repl] Re-registered environment: ${newCreds.environmentId}`,
+          )
+          continue
+        }
+
+        onStateChange?.(
+          'failed',
+          'Environment deleted and re-registration failed',
+        )
+        onFatalError?.()
+        break
+      }
+
+      // Fatal errors (401/403/404/410) — no point retrying
+      if (err instanceof BridgeFatalError) {
+        const isExpiry = isExpiredErrorType(err.errorType)
+        const isSuppressible = isSuppressible403(err)
+        logForDebugging(
+          `[bridge:repl] Fatal poll error: ${err.message} (status=${err.status}, type=${err.errorType ?? 'unknown'})${isSuppressible ? ' (suppressed)' : ''}`,
+        )
+        logEvent('tengu_bridge_repl_fatal_error', {
+          status: err.status,
+          error_type:
+            err.errorType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+        logForDiagnosticsNoPII(
+          isExpiry ? 'info' : 'error',
+          'bridge_repl_fatal_error',
+          { status: err.status, error_type: err.errorType },
+        )
+        // Cosmetic 403 errors (e.g., external_poll_sessions scope,
+        // environments:manage permission) — suppress user-visible error
+        // but always trigger teardown so cleanup runs.
+        if (!isSuppressible) {
+          onStateChange?.(
+            'failed',
+            isExpiry
+              ? 'session expired · /remote-control to reconnect'
+              : err.message,
+          )
+        }
+        // Always trigger teardown — matches bridgeMain.ts where fatalExit=true
+        // is unconditional and post-loop cleanup always runs.
+        onFatalError?.()
+        break
+      }
+
+      const now = Date.now()
+
+      // Detect system sleep/wake: if the gap since the last poll error
+      // greatly exceeds the max backoff delay, the machine likely slept.
+      // Reset error tracking so we retry with a fresh budget instead of
+      // immediately giving up.
+      if (
+        lastPollErrorTime !== null &&
+        now - lastPollErrorTime > POLL_ERROR_MAX_DELAY_MS * 2
+      ) {
+        logForDebugging(
+          `[bridge:repl] Detected system sleep (${Math.round((now - lastPollErrorTime) / 1000)}s gap), resetting poll error budget`,
+        )
+        logForDiagnosticsNoPII('info', 'bridge_repl_poll_sleep_detected', {
+          gapMs: now - lastPollErrorTime,
+        })
+        consecutiveErrors = 0
+        firstErrorTime = null
+      }
+      lastPollErrorTime = now
+
+      consecutiveErrors++
+      if (firstErrorTime === null) {
+        firstErrorTime = now
+      }
+      const elapsed = now - firstErrorTime
+      const httpStatus = extractHttpStatus(err)
+      const errMsg = describeAxiosError(err)
+      const wsLabel = getWsState?.() ?? 'unknown'
+
+      logForDebugging(
+        `[bridge:repl] Poll error (attempt ${consecutiveErrors}, elapsed ${Math.round(elapsed / 1000)}s, ws=${wsLabel}): ${errMsg}`,
+      )
+      logEvent('tengu_bridge_repl_poll_error', {
+        status: httpStatus,
+        consecutiveErrors,
+        elapsedMs: elapsed,
+      } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+
+      // Only transition to 'reconnecting' on the first error — stay
+      // there until a successful poll (avoid flickering the UI state).
+      if (consecutiveErrors === 1) {
+        onStateChange?.('reconnecting', errMsg)
+      }
+
+      // Give up after continuous failures
+      if (elapsed >= POLL_ERROR_GIVE_UP_MS) {
+        logForDebugging(
+          `[bridge:repl] Poll failures exceeded ${POLL_ERROR_GIVE_UP_MS / 1000}s (${consecutiveErrors} errors), giving up`,
+        )
+        logForDiagnosticsNoPII('info', 'bridge_repl_poll_give_up')
+        logEvent('tengu_bridge_repl_poll_give_up', {
+          consecutiveErrors,
+          elapsedMs: elapsed,
+          lastStatus: httpStatus,
+        } as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+        onStateChange?.('failed', 'connection to server lost')
+        break
+      }
+
+      // Exponential backoff: 2s → 4s → 8s → 16s → 32s → 60s (cap)
+      const backoff = Math.min(
+        POLL_ERROR_INITIAL_DELAY_MS * 2 ** (consecutiveErrors - 1),
+        POLL_ERROR_MAX_DELAY_MS,
+      )
+      // The poll_due heartbeat-loop exit leaves a healthy lease exposed to
+      // this backoff path. Heartbeat before each sleep so /poll outages
+      // (the VerifyEnvironmentSecretAuth DB path heartbeat was introduced to
+      // avoid) don't kill the 300s lease TTL.
+      if (getPollIntervalConfig().non_exclusive_heartbeat_interval_ms > 0) {
+        const info = getHeartbeatInfo?.()
+        if (info) {
+          try {
+            await api.heartbeatWork(
+              info.environmentId,
+              info.workId,
+              info.sessionToken,
+            )
+          } catch {
+            // Best-effort — if heartbeat also fails the lease dies, same as
+            // pre-poll_due behavior (where the only heartbeat-loop exits were
+            // ones where the lease was already dying).
+          }
+        }
+      }
+      await sleep(backoff, signal)
+    }
+  }
+
+  logForDebugging(
+    `[bridge:repl] Work poll loop ended (aborted=${signal.aborted}) env=${getCredentials().environmentId}`,
+  )
+}
+
+// Exported for testing only
+export {
+  startWorkPollLoop as _startWorkPollLoopForTesting,
+  POLL_ERROR_INITIAL_DELAY_MS as _POLL_ERROR_INITIAL_DELAY_MS_ForTesting,
+  POLL_ERROR_MAX_DELAY_MS as _POLL_ERROR_MAX_DELAY_MS_ForTesting,
+  POLL_ERROR_GIVE_UP_MS as _POLL_ERROR_GIVE_UP_MS_ForTesting,
+}

+ 36 - 0
src/bridge/replBridgeHandle.ts

@@ -0,0 +1,36 @@
+import { updateSessionBridgeId } from '../utils/concurrentSessions.js'
+import type { ReplBridgeHandle } from './replBridge.js'
+import { toCompatSessionId } from './sessionIdCompat.js'
+
+/**
+ * Global pointer to the active REPL bridge handle, so callers outside
+ * useReplBridge's React tree (tools, slash commands) can invoke handle methods
+ * like subscribePR. Same one-bridge-per-process justification as bridgeDebug.ts
+ * — the handle's closure captures the sessionId and getAccessToken that created
+ * the session, and re-deriving those independently (BriefTool/upload.ts pattern)
+ * risks staging/prod token divergence.
+ *
+ * Set from useReplBridge.tsx when init completes; cleared on teardown.
+ */
+
+let handle: ReplBridgeHandle | null = null
+
+export function setReplBridgeHandle(h: ReplBridgeHandle | null): void {
+  handle = h
+  // Publish (or clear) our bridge session ID in the session record so other
+  // local peers can dedup us out of their bridge list — local is preferred.
+  void updateSessionBridgeId(getSelfBridgeCompatId() ?? null).catch(() => {})
+}
+
+export function getReplBridgeHandle(): ReplBridgeHandle | null {
+  return handle
+}
+
+/**
+ * Our own bridge session ID in the session_* compat format the API returns
+ * in /v1/sessions responses — or undefined if bridge isn't connected.
+ */
+export function getSelfBridgeCompatId(): string | undefined {
+  const h = getReplBridgeHandle()
+  return h ? toCompatSessionId(h.bridgeSessionId) : undefined
+}

+ 370 - 0
src/bridge/replBridgeTransport.ts

@@ -0,0 +1,370 @@
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { CCRClient } from '../cli/transports/ccrClient.js'
+import type { HybridTransport } from '../cli/transports/HybridTransport.js'
+import { SSETransport } from '../cli/transports/SSETransport.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { updateSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
+import type { SessionState } from '../utils/sessionState.js'
+import { registerWorker } from './workSecret.js'
+
+/**
+ * Transport abstraction for replBridge. Covers exactly the surface that
+ * replBridge.ts uses against HybridTransport so the v1/v2 choice is
+ * confined to the construction site.
+ *
+ * - v1: HybridTransport (WS reads + POST writes to Session-Ingress)
+ * - v2: SSETransport (reads) + CCRClient (writes to CCR v2 /worker/*)
+ *
+ * The v2 write path goes through CCRClient.writeEvent → SerialBatchEventUploader,
+ * NOT through SSETransport.write() — SSETransport.write() targets the
+ * Session-Ingress POST URL shape, which is wrong for CCR v2.
+ */
+export type ReplBridgeTransport = {
+  write(message: StdoutMessage): Promise<void>
+  writeBatch(messages: StdoutMessage[]): Promise<void>
+  close(): void
+  isConnectedStatus(): boolean
+  getStateLabel(): string
+  setOnData(callback: (data: string) => void): void
+  setOnClose(callback: (closeCode?: number) => void): void
+  setOnConnect(callback: () => void): void
+  connect(): void
+  /**
+   * High-water mark of the underlying read stream's event sequence numbers.
+   * replBridge reads this before swapping transports so the new one can
+   * resume from where the old one left off (otherwise the server replays
+   * the entire session history from seq 0).
+   *
+   * v1 returns 0 — Session-Ingress WS doesn't use SSE sequence numbers;
+   * replay-on-reconnect is handled by the server-side message cursor.
+   */
+  getLastSequenceNum(): number
+  /**
+   * Monotonic count of batches dropped via maxConsecutiveFailures.
+   * Snapshot before writeBatch() and compare after to detect silent drops
+   * (writeBatch() resolves normally even when batches were dropped).
+   * v2 returns 0 — the v2 write path doesn't set maxConsecutiveFailures.
+   */
+  readonly droppedBatchCount: number
+  /**
+   * PUT /worker state (v2 only; v1 is a no-op). `requires_action` tells
+   * the backend a permission prompt is pending — claude.ai shows the
+   * "waiting for input" indicator. REPL/daemon callers don't need this
+   * (user watches the REPL locally); multi-session worker callers do.
+   */
+  reportState(state: SessionState): void
+  /** PUT /worker external_metadata (v2 only; v1 is a no-op). */
+  reportMetadata(metadata: Record<string, unknown>): void
+  /**
+   * POST /worker/events/{id}/delivery (v2 only; v1 is a no-op). Populates
+   * CCR's processing_at/processed_at columns. `received` is auto-fired by
+   * CCRClient on every SSE frame and is not exposed here.
+   */
+  reportDelivery(eventId: string, status: 'processing' | 'processed'): void
+  /**
+   * Drain the write queue before close() (v2 only; v1 resolves
+   * immediately — HybridTransport POSTs are already awaited per-write).
+   */
+  flush(): Promise<void>
+}
+
+/**
+ * v1 adapter: HybridTransport already has the full surface (it extends
+ * WebSocketTransport which has setOnConnect + getStateLabel). This is a
+ * no-op wrapper that exists only so replBridge's `transport` variable
+ * has a single type.
+ */
+export function createV1ReplTransport(
+  hybrid: HybridTransport,
+): ReplBridgeTransport {
+  return {
+    write: msg => hybrid.write(msg),
+    writeBatch: msgs => hybrid.writeBatch(msgs),
+    close: () => hybrid.close(),
+    isConnectedStatus: () => hybrid.isConnectedStatus(),
+    getStateLabel: () => hybrid.getStateLabel(),
+    setOnData: cb => hybrid.setOnData(cb),
+    setOnClose: cb => hybrid.setOnClose(cb),
+    setOnConnect: cb => hybrid.setOnConnect(cb),
+    connect: () => void hybrid.connect(),
+    // v1 Session-Ingress WS doesn't use SSE sequence numbers; replay
+    // semantics are different. Always return 0 so the seq-num carryover
+    // logic in replBridge is a no-op for v1.
+    getLastSequenceNum: () => 0,
+    get droppedBatchCount() {
+      return hybrid.droppedBatchCount
+    },
+    reportState: () => {},
+    reportMetadata: () => {},
+    reportDelivery: () => {},
+    flush: () => Promise.resolve(),
+  }
+}
+
+/**
+ * v2 adapter: wrap SSETransport (reads) + CCRClient (writes, heartbeat,
+ * state, delivery tracking).
+ *
+ * Auth: v2 endpoints validate the JWT's session_id claim (register_worker.go:32)
+ * and worker role (environment_auth.py:856). OAuth tokens have neither.
+ * This is the inverse of the v1 replBridge path, which deliberately uses OAuth.
+ * The JWT is refreshed when the poll loop re-dispatches work — the caller
+ * invokes createV2ReplTransport again with the fresh token.
+ *
+ * Registration happens here (not in the caller) so the entire v2 handshake
+ * is one async step. registerWorker failure propagates — replBridge will
+ * catch it and stay on the poll loop.
+ */
+export async function createV2ReplTransport(opts: {
+  sessionUrl: string
+  ingressToken: string
+  sessionId: string
+  /**
+   * SSE sequence-number high-water mark from the previous transport.
+   * Passed to the new SSETransport so its first connect() sends
+   * from_sequence_num / Last-Event-ID and the server resumes from where
+   * the old stream left off. Without this, every transport swap asks the
+   * server to replay the entire session history from seq 0.
+   */
+  initialSequenceNum?: number
+  /**
+   * Worker epoch from POST /bridge response. When provided, the server
+   * already bumped epoch (the /bridge call IS the register — see server
+   * PR #293280). When omitted (v1 CCR-v2 path via replBridge.ts poll loop),
+   * call registerWorker as before.
+   */
+  epoch?: number
+  /** CCRClient heartbeat interval. Defaults to 20s when omitted. */
+  heartbeatIntervalMs?: number
+  /** ±fraction per-beat jitter. Defaults to 0 (no jitter) when omitted. */
+  heartbeatJitterFraction?: number
+  /**
+   * When true, skip opening the SSE read stream — only the CCRClient write
+   * path is activated. Use for mirror-mode attachments that forward events
+   * but never receive inbound prompts or control requests.
+   */
+  outboundOnly?: boolean
+  /**
+   * Per-instance auth header source. When provided, CCRClient + SSETransport
+   * read auth from this closure instead of the process-wide
+   * CLAUDE_CODE_SESSION_ACCESS_TOKEN env var. Required for callers managing
+   * multiple concurrent sessions — the env-var path stomps across sessions.
+   * When omitted, falls back to the env var (single-session callers).
+   */
+  getAuthToken?: () => string | undefined
+}): Promise<ReplBridgeTransport> {
+  const {
+    sessionUrl,
+    ingressToken,
+    sessionId,
+    initialSequenceNum,
+    getAuthToken,
+  } = opts
+
+  // Auth header builder. If getAuthToken is provided, read from it
+  // (per-instance, multi-session safe). Otherwise write ingressToken to
+  // the process-wide env var (legacy single-session path — CCRClient's
+  // default getAuthHeaders reads it via getSessionIngressAuthHeaders).
+  let getAuthHeaders: (() => Record<string, string>) | undefined
+  if (getAuthToken) {
+    getAuthHeaders = (): Record<string, string> => {
+      const token = getAuthToken()
+      if (!token) return {}
+      return { Authorization: `Bearer ${token}` }
+    }
+  } else {
+    // CCRClient.request() and SSETransport.connect() both read auth via
+    // getSessionIngressAuthHeaders() → this env var. Set it before either
+    // touches the network.
+    updateSessionIngressAuthToken(ingressToken)
+  }
+
+  const epoch = opts.epoch ?? (await registerWorker(sessionUrl, ingressToken))
+  logForDebugging(
+    `[bridge:repl] CCR v2: worker sessionId=${sessionId} epoch=${epoch}${opts.epoch !== undefined ? ' (from /bridge)' : ' (via registerWorker)'}`,
+  )
+
+  // Derive SSE stream URL. Same logic as transportUtils.ts:26-33 but
+  // starting from an http(s) base instead of a --sdk-url that might be ws://.
+  const sseUrl = new URL(sessionUrl)
+  sseUrl.pathname = sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
+
+  const sse = new SSETransport(
+    sseUrl,
+    {},
+    sessionId,
+    undefined,
+    initialSequenceNum,
+    getAuthHeaders,
+  )
+  let onCloseCb: ((closeCode?: number) => void) | undefined
+  const ccr = new CCRClient(sse, new URL(sessionUrl), {
+    getAuthHeaders,
+    heartbeatIntervalMs: opts.heartbeatIntervalMs,
+    heartbeatJitterFraction: opts.heartbeatJitterFraction,
+    // Default is process.exit(1) — correct for spawn-mode children. In-process,
+    // that kills the REPL. Close instead: replBridge's onClose wakes the poll
+    // loop, which picks up the server's re-dispatch (with fresh epoch).
+    onEpochMismatch: () => {
+      logForDebugging(
+        '[bridge:repl] CCR v2: epoch superseded (409) — closing for poll-loop recovery',
+      )
+      // Close resources in a try block so the throw always executes.
+      // If ccr.close() or sse.close() throw, we still need to unwind
+      // the caller (request()) — otherwise handleEpochMismatch's `never`
+      // return type is violated at runtime and control falls through.
+      try {
+        ccr.close()
+        sse.close()
+        onCloseCb?.(4090)
+      } catch (closeErr: unknown) {
+        logForDebugging(
+          `[bridge:repl] CCR v2: error during epoch-mismatch cleanup: ${errorMessage(closeErr)}`,
+          { level: 'error' },
+        )
+      }
+      // Don't return — the calling request() code continues after the 409
+      // branch, so callers see the logged warning and a false return. We
+      // throw to unwind; the uploaders catch it as a send failure.
+      throw new Error('epoch superseded')
+    },
+  })
+
+  // CCRClient's constructor wired sse.setOnEvent → reportDelivery('received').
+  // remoteIO.ts additionally sends 'processing'/'processed' via
+  // setCommandLifecycleListener, which the in-process query loop fires. This
+  // transport's only caller (replBridge/daemonBridge) has no such wiring — the
+  // daemon's agent child is a separate process (ProcessTransport), and its
+  // notifyCommandLifecycle calls fire with listener=null in its own module
+  // scope. So events stay at 'received' forever, and reconnectSession re-queues
+  // them on every daemon restart (observed: 21→24→25 phantom prompts as
+  // "user sent a new message while you were working" system-reminders).
+  //
+  // Fix: ACK 'processed' immediately alongside 'received'. The window between
+  // SSE receipt and transcript-write is narrow (queue → SDK → child stdin →
+  // model); a crash there loses one prompt vs. the observed N-prompt flood on
+  // every restart. Overwrite the constructor's wiring to do both — setOnEvent
+  // replaces, not appends (SSETransport.ts:658).
+  sse.setOnEvent(event => {
+    ccr.reportDelivery(event.event_id, 'received')
+    ccr.reportDelivery(event.event_id, 'processed')
+  })
+
+  // Both sse.connect() and ccr.initialize() are deferred to connect() below.
+  // replBridge's calling order is newTransport → setOnConnect → setOnData →
+  // setOnClose → connect(), and both calls need those callbacks wired first:
+  // sse.connect() opens the stream (events flow to onData/onClose immediately),
+  // and ccr.initialize().then() fires onConnectCb.
+  //
+  // onConnect fires once ccr.initialize() resolves. Writes go via
+  // CCRClient HTTP POST (SerialBatchEventUploader), not SSE, so the
+  // write path is ready the moment workerEpoch is set. SSE.connect()
+  // awaits its read loop and never resolves — don't gate on it.
+  // The SSE stream opens in parallel (~30ms) and starts delivering
+  // inbound events via setOnData; outbound doesn't need to wait for it.
+  let onConnectCb: (() => void) | undefined
+  let ccrInitialized = false
+  let closed = false
+
+  return {
+    write(msg) {
+      return ccr.writeEvent(msg)
+    },
+    async writeBatch(msgs) {
+      // SerialBatchEventUploader already batches internally (maxBatchSize=100);
+      // sequential enqueue preserves order and the uploader coalesces.
+      // Check closed between writes to avoid sending partial batches after
+      // transport teardown (epoch mismatch, SSE drop).
+      for (const m of msgs) {
+        if (closed) break
+        await ccr.writeEvent(m)
+      }
+    },
+    close() {
+      closed = true
+      ccr.close()
+      sse.close()
+    },
+    isConnectedStatus() {
+      // Write-readiness, not read-readiness — replBridge checks this
+      // before calling writeBatch. SSE open state is orthogonal.
+      return ccrInitialized
+    },
+    getStateLabel() {
+      // SSETransport doesn't expose its state string; synthesize from
+      // what we can observe. replBridge only uses this for debug logging.
+      if (sse.isClosedStatus()) return 'closed'
+      if (sse.isConnectedStatus()) return ccrInitialized ? 'connected' : 'init'
+      return 'connecting'
+    },
+    setOnData(cb) {
+      sse.setOnData(cb)
+    },
+    setOnClose(cb) {
+      onCloseCb = cb
+      // SSE reconnect-budget exhaustion fires onClose(undefined) — map to
+      // 4092 so ws_closed telemetry can distinguish it from HTTP-status
+      // closes (SSETransport:280 passes response.status). Stop CCRClient's
+      // heartbeat timer before notifying replBridge. (sse.close() doesn't
+      // invoke this, so the epoch-mismatch path above isn't double-firing.)
+      sse.setOnClose(code => {
+        ccr.close()
+        cb(code ?? 4092)
+      })
+    },
+    setOnConnect(cb) {
+      onConnectCb = cb
+    },
+    getLastSequenceNum() {
+      return sse.getLastSequenceNum()
+    },
+    // v2 write path (CCRClient) doesn't set maxConsecutiveFailures — no drops.
+    droppedBatchCount: 0,
+    reportState(state) {
+      ccr.reportState(state)
+    },
+    reportMetadata(metadata) {
+      ccr.reportMetadata(metadata)
+    },
+    reportDelivery(eventId, status) {
+      ccr.reportDelivery(eventId, status)
+    },
+    flush() {
+      return ccr.flush()
+    },
+    connect() {
+      // Outbound-only: skip the SSE read stream entirely — no inbound
+      // events to receive, no delivery ACKs to send. Only the CCRClient
+      // write path (POST /worker/events) and heartbeat are needed.
+      if (!opts.outboundOnly) {
+        // Fire-and-forget — SSETransport.connect() awaits readStream()
+        // (the read loop) and only resolves on stream close/error. The
+        // spawn-mode path in remoteIO.ts does the same void discard.
+        void sse.connect()
+      }
+      void ccr.initialize(epoch).then(
+        () => {
+          ccrInitialized = true
+          logForDebugging(
+            `[bridge:repl] v2 transport ready for writes (epoch=${epoch}, sse=${sse.isConnectedStatus() ? 'open' : 'opening'})`,
+          )
+          onConnectCb?.()
+        },
+        (err: unknown) => {
+          logForDebugging(
+            `[bridge:repl] CCR v2 initialize failed: ${errorMessage(err)}`,
+            { level: 'error' },
+          )
+          // Close transport resources and notify replBridge via onClose
+          // so the poll loop can retry on the next work dispatch.
+          // Without this callback, replBridge never learns the transport
+          // failed to initialize and sits with transport === null forever.
+          ccr.close()
+          sse.close()
+          onCloseCb?.(4091) // 4091 = init failure, distinguishable from 4090 epoch mismatch
+        },
+      )
+    },
+  }
+}

+ 57 - 0
src/bridge/sessionIdCompat.ts

@@ -0,0 +1,57 @@
+/**
+ * Session ID tag translation helpers for the CCR v2 compat layer.
+ *
+ * Lives in its own file (rather than workSecret.ts) so that sessionHandle.ts
+ * and replBridgeTransport.ts (bridge.mjs entry points) can import from
+ * workSecret.ts without pulling in these retag functions.
+ *
+ * The isCseShimEnabled kill switch is injected via setCseShimGate() to avoid
+ * a static import of bridgeEnabled.ts → growthbook.ts → config.ts — all
+ * banned from the sdk.mjs bundle (scripts/build-agent-sdk.sh). Callers that
+ * already import bridgeEnabled.ts register the gate; the SDK path never does,
+ * so the shim defaults to active (matching isCseShimEnabled()'s own default).
+ */
+
+let _isCseShimEnabled: (() => boolean) | undefined
+
+/**
+ * Register the GrowthBook gate for the cse_ shim. Called from bridge
+ * init code that already imports bridgeEnabled.ts.
+ */
+export function setCseShimGate(gate: () => boolean): void {
+  _isCseShimEnabled = gate
+}
+
+/**
+ * Re-tag a `cse_*` session ID to `session_*` for use with the v1 compat API.
+ *
+ * Worker endpoints (/v1/code/sessions/{id}/worker/*) want `cse_*`; that's
+ * what the work poll delivers. Client-facing compat endpoints
+ * (/v1/sessions/{id}, /v1/sessions/{id}/archive, /v1/sessions/{id}/events)
+ * want `session_*` — compat/convert.go:27 validates TagSession. Same UUID,
+ * different costume. No-op for IDs that aren't `cse_*`.
+ *
+ * bridgeMain holds one sessionId variable for both worker registration and
+ * session-management calls. It arrives as `cse_*` from the work poll under
+ * the compat gate, so archiveSession/fetchSessionTitle need this re-tag.
+ */
+export function toCompatSessionId(id: string): string {
+  if (!id.startsWith('cse_')) return id
+  if (_isCseShimEnabled && !_isCseShimEnabled()) return id
+  return 'session_' + id.slice('cse_'.length)
+}
+
+/**
+ * Re-tag a `session_*` session ID to `cse_*` for infrastructure-layer calls.
+ *
+ * Inverse of toCompatSessionId. POST /v1/environments/{id}/bridge/reconnect
+ * lives below the compat layer: once ccr_v2_compat_enabled is on server-side,
+ * it looks sessions up by their infra tag (`cse_*`). createBridgeSession still
+ * returns `session_*` (compat/convert.go:41) and that's what bridge-pointer
+ * stores — so perpetual reconnect passes the wrong costume and gets "Session
+ * not found" back. Same UUID, wrong tag. No-op for IDs that aren't `session_*`.
+ */
+export function toInfraSessionId(id: string): string {
+  if (!id.startsWith('session_')) return id
+  return 'cse_' + id.slice('session_'.length)
+}

+ 550 - 0
src/bridge/sessionRunner.ts

@@ -0,0 +1,550 @@
+import { type ChildProcess, spawn } from 'child_process'
+import { createWriteStream, type WriteStream } from 'fs'
+import { tmpdir } from 'os'
+import { dirname, join } from 'path'
+import { createInterface } from 'readline'
+import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
+import { debugTruncate } from './debugUtils.js'
+import type {
+  SessionActivity,
+  SessionDoneStatus,
+  SessionHandle,
+  SessionSpawner,
+  SessionSpawnOpts,
+} from './types.js'
+
+const MAX_ACTIVITIES = 10
+const MAX_STDERR_LINES = 10
+
+/**
+ * Sanitize a session ID for use in file names.
+ * Strips any characters that could cause path traversal (e.g. `../`, `/`)
+ * or other filesystem issues, replacing them with underscores.
+ */
+export function safeFilenameId(id: string): string {
+  return id.replace(/[^a-zA-Z0-9_-]/g, '_')
+}
+
+/**
+ * A control_request emitted by the child CLI when it needs permission to
+ * execute a **specific** tool invocation (not a general capability check).
+ * The bridge forwards this to the server so the user can approve/deny.
+ */
+export type PermissionRequest = {
+  type: 'control_request'
+  request_id: string
+  request: {
+    /** Per-invocation permission check — "may I run this tool with these inputs?" */
+    subtype: 'can_use_tool'
+    tool_name: string
+    input: Record<string, unknown>
+    tool_use_id: string
+  }
+}
+
+type SessionSpawnerDeps = {
+  execPath: string
+  /**
+   * Arguments that must precede the CLI flags when spawning. Empty for
+   * compiled binaries (where execPath is the claude binary itself); contains
+   * the script path (process.argv[1]) for npm installs where execPath is the
+   * node runtime. Without this, node sees --sdk-url as a node option and
+   * exits with "bad option: --sdk-url" (see anthropics/claude-code#28334).
+   */
+  scriptArgs: string[]
+  env: NodeJS.ProcessEnv
+  verbose: boolean
+  sandbox: boolean
+  debugFile?: string
+  permissionMode?: string
+  onDebug: (msg: string) => void
+  onActivity?: (sessionId: string, activity: SessionActivity) => void
+  onPermissionRequest?: (
+    sessionId: string,
+    request: PermissionRequest,
+    accessToken: string,
+  ) => void
+}
+
+/** Map tool names to human-readable verbs for the status display. */
+const TOOL_VERBS: Record<string, string> = {
+  Read: 'Reading',
+  Write: 'Writing',
+  Edit: 'Editing',
+  MultiEdit: 'Editing',
+  Bash: 'Running',
+  Glob: 'Searching',
+  Grep: 'Searching',
+  WebFetch: 'Fetching',
+  WebSearch: 'Searching',
+  Task: 'Running task',
+  FileReadTool: 'Reading',
+  FileWriteTool: 'Writing',
+  FileEditTool: 'Editing',
+  GlobTool: 'Searching',
+  GrepTool: 'Searching',
+  BashTool: 'Running',
+  NotebookEditTool: 'Editing notebook',
+  LSP: 'LSP',
+}
+
+function toolSummary(name: string, input: Record<string, unknown>): string {
+  const verb = TOOL_VERBS[name] ?? name
+  const target =
+    (input.file_path as string) ??
+    (input.filePath as string) ??
+    (input.pattern as string) ??
+    (input.command as string | undefined)?.slice(0, 60) ??
+    (input.url as string) ??
+    (input.query as string) ??
+    ''
+  if (target) {
+    return `${verb} ${target}`
+  }
+  return verb
+}
+
+function extractActivities(
+  line: string,
+  sessionId: string,
+  onDebug: (msg: string) => void,
+): SessionActivity[] {
+  let parsed: unknown
+  try {
+    parsed = jsonParse(line)
+  } catch {
+    return []
+  }
+
+  if (!parsed || typeof parsed !== 'object') {
+    return []
+  }
+
+  const msg = parsed as Record<string, unknown>
+  const activities: SessionActivity[] = []
+  const now = Date.now()
+
+  switch (msg.type) {
+    case 'assistant': {
+      const message = msg.message as Record<string, unknown> | undefined
+      if (!message) break
+      const content = message.content
+      if (!Array.isArray(content)) break
+
+      for (const block of content) {
+        if (!block || typeof block !== 'object') continue
+        const b = block as Record<string, unknown>
+
+        if (b.type === 'tool_use') {
+          const name = (b.name as string) ?? 'Tool'
+          const input = (b.input as Record<string, unknown>) ?? {}
+          const summary = toolSummary(name, input)
+          activities.push({
+            type: 'tool_start',
+            summary,
+            timestamp: now,
+          })
+          onDebug(
+            `[bridge:activity] sessionId=${sessionId} tool_use name=${name} ${inputPreview(input)}`,
+          )
+        } else if (b.type === 'text') {
+          const text = (b.text as string) ?? ''
+          if (text.length > 0) {
+            activities.push({
+              type: 'text',
+              summary: text.slice(0, 80),
+              timestamp: now,
+            })
+            onDebug(
+              `[bridge:activity] sessionId=${sessionId} text "${text.slice(0, 100)}"`,
+            )
+          }
+        }
+      }
+      break
+    }
+    case 'result': {
+      const subtype = msg.subtype as string | undefined
+      if (subtype === 'success') {
+        activities.push({
+          type: 'result',
+          summary: 'Session completed',
+          timestamp: now,
+        })
+        onDebug(
+          `[bridge:activity] sessionId=${sessionId} result subtype=success`,
+        )
+      } else if (subtype) {
+        const errors = msg.errors as string[] | undefined
+        const errorSummary = errors?.[0] ?? `Error: ${subtype}`
+        activities.push({
+          type: 'error',
+          summary: errorSummary,
+          timestamp: now,
+        })
+        onDebug(
+          `[bridge:activity] sessionId=${sessionId} result subtype=${subtype} error="${errorSummary}"`,
+        )
+      } else {
+        onDebug(
+          `[bridge:activity] sessionId=${sessionId} result subtype=undefined`,
+        )
+      }
+      break
+    }
+    default:
+      break
+  }
+
+  return activities
+}
+
+/**
+ * Extract plain text from a replayed SDKUserMessage NDJSON line. Returns the
+ * trimmed text if this looks like a real human-authored message, otherwise
+ * undefined so the caller keeps waiting for the first real message.
+ */
+function extractUserMessageText(
+  msg: Record<string, unknown>,
+): string | undefined {
+  // Skip tool-result user messages (wrapped subagent results) and synthetic
+  // caveat messages — neither is human-authored.
+  if (msg.parent_tool_use_id != null || msg.isSynthetic || msg.isReplay)
+    return undefined
+
+  const message = msg.message as Record<string, unknown> | undefined
+  const content = message?.content
+  let text: string | undefined
+  if (typeof content === 'string') {
+    text = content
+  } else if (Array.isArray(content)) {
+    for (const block of content) {
+      if (
+        block &&
+        typeof block === 'object' &&
+        (block as Record<string, unknown>).type === 'text'
+      ) {
+        text = (block as Record<string, unknown>).text as string | undefined
+        break
+      }
+    }
+  }
+  text = text?.trim()
+  return text ? text : undefined
+}
+
+/** Build a short preview of tool input for debug logging. */
+function inputPreview(input: Record<string, unknown>): string {
+  const parts: string[] = []
+  for (const [key, val] of Object.entries(input)) {
+    if (typeof val === 'string') {
+      parts.push(`${key}="${val.slice(0, 100)}"`)
+    }
+    if (parts.length >= 3) break
+  }
+  return parts.join(' ')
+}
+
+export function createSessionSpawner(deps: SessionSpawnerDeps): SessionSpawner {
+  return {
+    spawn(opts: SessionSpawnOpts, dir: string): SessionHandle {
+      // Debug file resolution:
+      // 1. If deps.debugFile is provided, use it with session ID suffix for uniqueness
+      // 2. If verbose or ant build, auto-generate a temp file path
+      // 3. Otherwise, no debug file
+      const safeId = safeFilenameId(opts.sessionId)
+      let debugFile: string | undefined
+      if (deps.debugFile) {
+        const ext = deps.debugFile.lastIndexOf('.')
+        if (ext > 0) {
+          debugFile = `${deps.debugFile.slice(0, ext)}-${safeId}${deps.debugFile.slice(ext)}`
+        } else {
+          debugFile = `${deps.debugFile}-${safeId}`
+        }
+      } else if (deps.verbose || process.env.USER_TYPE === 'ant') {
+        debugFile = join(tmpdir(), 'claude', `bridge-session-${safeId}.log`)
+      }
+
+      // Transcript file: write raw NDJSON lines for post-hoc analysis.
+      // Placed alongside the debug file when one is configured.
+      let transcriptStream: WriteStream | null = null
+      let transcriptPath: string | undefined
+      if (deps.debugFile) {
+        transcriptPath = join(
+          dirname(deps.debugFile),
+          `bridge-transcript-${safeId}.jsonl`,
+        )
+        transcriptStream = createWriteStream(transcriptPath, { flags: 'a' })
+        transcriptStream.on('error', err => {
+          deps.onDebug(
+            `[bridge:session] Transcript write error: ${err.message}`,
+          )
+          transcriptStream = null
+        })
+        deps.onDebug(`[bridge:session] Transcript log: ${transcriptPath}`)
+      }
+
+      const args = [
+        ...deps.scriptArgs,
+        '--print',
+        '--sdk-url',
+        opts.sdkUrl,
+        '--session-id',
+        opts.sessionId,
+        '--input-format',
+        'stream-json',
+        '--output-format',
+        'stream-json',
+        '--replay-user-messages',
+        ...(deps.verbose ? ['--verbose'] : []),
+        ...(debugFile ? ['--debug-file', debugFile] : []),
+        ...(deps.permissionMode
+          ? ['--permission-mode', deps.permissionMode]
+          : []),
+      ]
+
+      const env: NodeJS.ProcessEnv = {
+        ...deps.env,
+        // Strip the bridge's OAuth token so the child CC process uses
+        // the session access token for inference instead.
+        CLAUDE_CODE_OAUTH_TOKEN: undefined,
+        CLAUDE_CODE_ENVIRONMENT_KIND: 'bridge',
+        ...(deps.sandbox && { CLAUDE_CODE_FORCE_SANDBOX: '1' }),
+        CLAUDE_CODE_SESSION_ACCESS_TOKEN: opts.accessToken,
+        // v1: HybridTransport (WS reads + POST writes) to Session-Ingress.
+        // Harmless in v2 mode — transportUtils checks CLAUDE_CODE_USE_CCR_V2 first.
+        CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2: '1',
+        // v2: SSETransport + CCRClient to CCR's /v1/code/sessions/* endpoints.
+        // Same env vars environment-manager sets in the container path.
+        ...(opts.useCcrV2 && {
+          CLAUDE_CODE_USE_CCR_V2: '1',
+          CLAUDE_CODE_WORKER_EPOCH: String(opts.workerEpoch),
+        }),
+      }
+
+      deps.onDebug(
+        `[bridge:session] Spawning sessionId=${opts.sessionId} sdkUrl=${opts.sdkUrl} accessToken=${opts.accessToken ? 'present' : 'MISSING'}`,
+      )
+      deps.onDebug(`[bridge:session] Child args: ${args.join(' ')}`)
+      if (debugFile) {
+        deps.onDebug(`[bridge:session] Debug log: ${debugFile}`)
+      }
+
+      // Pipe all three streams: stdin for control, stdout for NDJSON parsing,
+      // stderr for error capture and diagnostics.
+      const child: ChildProcess = spawn(deps.execPath, args, {
+        cwd: dir,
+        stdio: ['pipe', 'pipe', 'pipe'],
+        env,
+        windowsHide: true,
+      })
+
+      deps.onDebug(
+        `[bridge:session] sessionId=${opts.sessionId} pid=${child.pid}`,
+      )
+
+      const activities: SessionActivity[] = []
+      let currentActivity: SessionActivity | null = null
+      const lastStderr: string[] = []
+      let sigkillSent = false
+      let firstUserMessageSeen = false
+
+      // Buffer stderr for error diagnostics
+      if (child.stderr) {
+        const stderrRl = createInterface({ input: child.stderr })
+        stderrRl.on('line', line => {
+          // Forward stderr to bridge's stderr in verbose mode
+          if (deps.verbose) {
+            process.stderr.write(line + '\n')
+          }
+          // Ring buffer of last N lines
+          if (lastStderr.length >= MAX_STDERR_LINES) {
+            lastStderr.shift()
+          }
+          lastStderr.push(line)
+        })
+      }
+
+      // Parse NDJSON from child stdout
+      if (child.stdout) {
+        const rl = createInterface({ input: child.stdout })
+        rl.on('line', line => {
+          // Write raw NDJSON to transcript file
+          if (transcriptStream) {
+            transcriptStream.write(line + '\n')
+          }
+
+          // Log all messages flowing from the child CLI to the bridge
+          deps.onDebug(
+            `[bridge:ws] sessionId=${opts.sessionId} <<< ${debugTruncate(line)}`,
+          )
+
+          // In verbose mode, forward raw output to stderr
+          if (deps.verbose) {
+            process.stderr.write(line + '\n')
+          }
+
+          const extracted = extractActivities(
+            line,
+            opts.sessionId,
+            deps.onDebug,
+          )
+          for (const activity of extracted) {
+            // Maintain ring buffer
+            if (activities.length >= MAX_ACTIVITIES) {
+              activities.shift()
+            }
+            activities.push(activity)
+            currentActivity = activity
+
+            deps.onActivity?.(opts.sessionId, activity)
+          }
+
+          // Detect control_request and replayed user messages.
+          // extractActivities parses the same line but swallows parse errors
+          // and skips 'user' type — re-parse here is cheap (NDJSON lines are
+          // small) and keeps each path self-contained.
+          {
+            let parsed: unknown
+            try {
+              parsed = jsonParse(line)
+            } catch {
+              // Non-JSON line, skip detection
+            }
+            if (parsed && typeof parsed === 'object') {
+              const msg = parsed as Record<string, unknown>
+
+              if (msg.type === 'control_request') {
+                const request = msg.request as
+                  | Record<string, unknown>
+                  | undefined
+                if (
+                  request?.subtype === 'can_use_tool' &&
+                  deps.onPermissionRequest
+                ) {
+                  deps.onPermissionRequest(
+                    opts.sessionId,
+                    parsed as PermissionRequest,
+                    opts.accessToken,
+                  )
+                }
+                // interrupt is turn-level; the child handles it internally (print.ts)
+              } else if (
+                msg.type === 'user' &&
+                !firstUserMessageSeen &&
+                opts.onFirstUserMessage
+              ) {
+                const text = extractUserMessageText(msg)
+                if (text) {
+                  firstUserMessageSeen = true
+                  opts.onFirstUserMessage(text)
+                }
+              }
+            }
+          }
+        })
+      }
+
+      const done = new Promise<SessionDoneStatus>(resolve => {
+        child.on('close', (code, signal) => {
+          // Close transcript stream on exit
+          if (transcriptStream) {
+            transcriptStream.end()
+            transcriptStream = null
+          }
+
+          if (signal === 'SIGTERM' || signal === 'SIGINT') {
+            deps.onDebug(
+              `[bridge:session] sessionId=${opts.sessionId} interrupted signal=${signal} pid=${child.pid}`,
+            )
+            resolve('interrupted')
+          } else if (code === 0) {
+            deps.onDebug(
+              `[bridge:session] sessionId=${opts.sessionId} completed exit_code=0 pid=${child.pid}`,
+            )
+            resolve('completed')
+          } else {
+            deps.onDebug(
+              `[bridge:session] sessionId=${opts.sessionId} failed exit_code=${code} pid=${child.pid}`,
+            )
+            resolve('failed')
+          }
+        })
+
+        child.on('error', err => {
+          deps.onDebug(
+            `[bridge:session] sessionId=${opts.sessionId} spawn error: ${err.message}`,
+          )
+          resolve('failed')
+        })
+      })
+
+      const handle: SessionHandle = {
+        sessionId: opts.sessionId,
+        done,
+        activities,
+        accessToken: opts.accessToken,
+        lastStderr,
+        get currentActivity(): SessionActivity | null {
+          return currentActivity
+        },
+        kill(): void {
+          if (!child.killed) {
+            deps.onDebug(
+              `[bridge:session] Sending SIGTERM to sessionId=${opts.sessionId} pid=${child.pid}`,
+            )
+            // On Windows, child.kill('SIGTERM') throws; use default signal.
+            if (process.platform === 'win32') {
+              child.kill()
+            } else {
+              child.kill('SIGTERM')
+            }
+          }
+        },
+        forceKill(): void {
+          // Use separate flag because child.killed is set when kill() is called,
+          // not when the process exits. We need to send SIGKILL even after SIGTERM.
+          if (!sigkillSent && child.pid) {
+            sigkillSent = true
+            deps.onDebug(
+              `[bridge:session] Sending SIGKILL to sessionId=${opts.sessionId} pid=${child.pid}`,
+            )
+            if (process.platform === 'win32') {
+              child.kill()
+            } else {
+              child.kill('SIGKILL')
+            }
+          }
+        },
+        writeStdin(data: string): void {
+          if (child.stdin && !child.stdin.destroyed) {
+            deps.onDebug(
+              `[bridge:ws] sessionId=${opts.sessionId} >>> ${debugTruncate(data)}`,
+            )
+            child.stdin.write(data)
+          }
+        },
+        updateAccessToken(token: string): void {
+          handle.accessToken = token
+          // Send the fresh token to the child process via stdin. The child's
+          // StructuredIO handles update_environment_variables messages by
+          // setting process.env directly, so getSessionIngressAuthToken()
+          // picks up the new token on the next refreshHeaders call.
+          handle.writeStdin(
+            jsonStringify({
+              type: 'update_environment_variables',
+              variables: { CLAUDE_CODE_SESSION_ACCESS_TOKEN: token },
+            }) + '\n',
+          )
+          deps.onDebug(
+            `[bridge:session] Sent token refresh via stdin for sessionId=${opts.sessionId}`,
+          )
+        },
+      }
+
+      return handle
+    },
+  }
+}
+
+export { extractActivities as _extractActivitiesForTesting }

+ 210 - 0
src/bridge/trustedDevice.ts

@@ -0,0 +1,210 @@
+import axios from 'axios'
+import memoize from 'lodash-es/memoize.js'
+import { hostname } from 'os'
+import { getOauthConfig } from '../constants/oauth.js'
+import {
+  checkGate_CACHED_OR_BLOCKING,
+  getFeatureValue_CACHED_MAY_BE_STALE,
+} from '../services/analytics/growthbook.js'
+import { logForDebugging } from '../utils/debug.js'
+import { errorMessage } from '../utils/errors.js'
+import { isEssentialTrafficOnly } from '../utils/privacyLevel.js'
+import { getSecureStorage } from '../utils/secureStorage/index.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+
+/**
+ * Trusted device token source for bridge (remote-control) sessions.
+ *
+ * Bridge sessions have SecurityTier=ELEVATED on the server (CCR v2).
+ * The server gates ConnectBridgeWorker on its own flag
+ * (sessions_elevated_auth_enforcement in Anthropic Main); this CLI-side
+ * flag controls whether the CLI sends X-Trusted-Device-Token at all.
+ * Two flags so rollout can be staged: flip CLI-side first (headers
+ * start flowing, server still no-ops), then flip server-side.
+ *
+ * Enrollment (POST /auth/trusted_devices) is gated server-side by
+ * account_session.created_at < 10min, so it must happen during /login.
+ * Token is persistent (90d rolling expiry) and stored in keychain.
+ *
+ * See anthropics/anthropic#274559 (spec), #310375 (B1b tenant RPCs),
+ * #295987 (B2 Python routes), #307150 (C1' CCR v2 gate).
+ */
+
+const TRUSTED_DEVICE_GATE = 'tengu_sessions_elevated_auth_enforcement'
+
+function isGateEnabled(): boolean {
+  return getFeatureValue_CACHED_MAY_BE_STALE(TRUSTED_DEVICE_GATE, false)
+}
+
+// Memoized — secureStorage.read() spawns a macOS `security` subprocess (~40ms).
+// bridgeApi.ts calls this from getHeaders() on every poll/heartbeat/ack.
+// Cache cleared after enrollment (below) and on logout (clearAuthRelatedCaches).
+//
+// Only the storage read is memoized — the GrowthBook gate is checked live so
+// that a gate flip after GrowthBook refresh takes effect without a restart.
+const readStoredToken = memoize((): string | undefined => {
+  // Env var takes precedence for testing/canary.
+  const envToken = process.env.CLAUDE_TRUSTED_DEVICE_TOKEN
+  if (envToken) {
+    return envToken
+  }
+  return getSecureStorage().read()?.trustedDeviceToken
+})
+
+export function getTrustedDeviceToken(): string | undefined {
+  if (!isGateEnabled()) {
+    return undefined
+  }
+  return readStoredToken()
+}
+
+export function clearTrustedDeviceTokenCache(): void {
+  readStoredToken.cache?.clear?.()
+}
+
+/**
+ * Clear the stored trusted device token from secure storage and the memo cache.
+ * Called before enrollTrustedDevice() during /login so a stale token from the
+ * previous account isn't sent as X-Trusted-Device-Token while enrollment is
+ * in-flight (enrollTrustedDevice is async — bridge API calls between login and
+ * enrollment completion would otherwise still read the old cached token).
+ */
+export function clearTrustedDeviceToken(): void {
+  if (!isGateEnabled()) {
+    return
+  }
+  const secureStorage = getSecureStorage()
+  try {
+    const data = secureStorage.read()
+    if (data?.trustedDeviceToken) {
+      delete data.trustedDeviceToken
+      secureStorage.update(data)
+    }
+  } catch {
+    // Best-effort — don't block login if storage is inaccessible
+  }
+  readStoredToken.cache?.clear?.()
+}
+
+/**
+ * Enroll this device via POST /auth/trusted_devices and persist the token
+ * to keychain. Best-effort — logs and returns on failure so callers
+ * (post-login hooks) don't block the login flow.
+ *
+ * The server gates enrollment on account_session.created_at < 10min, so
+ * this must be called immediately after a fresh /login. Calling it later
+ * (e.g. lazy enrollment on /bridge 403) will fail with 403 stale_session.
+ */
+export async function enrollTrustedDevice(): Promise<void> {
+  try {
+    // checkGate_CACHED_OR_BLOCKING awaits any in-flight GrowthBook re-init
+    // (triggered by refreshGrowthBookAfterAuthChange in login.tsx) before
+    // reading the gate, so we get the post-refresh value.
+    if (!(await checkGate_CACHED_OR_BLOCKING(TRUSTED_DEVICE_GATE))) {
+      logForDebugging(
+        `[trusted-device] Gate ${TRUSTED_DEVICE_GATE} is off, skipping enrollment`,
+      )
+      return
+    }
+    // If CLAUDE_TRUSTED_DEVICE_TOKEN is set (e.g. by an enterprise wrapper),
+    // skip enrollment — the env var takes precedence in readStoredToken() so
+    // any enrolled token would be shadowed and never used.
+    if (process.env.CLAUDE_TRUSTED_DEVICE_TOKEN) {
+      logForDebugging(
+        '[trusted-device] CLAUDE_TRUSTED_DEVICE_TOKEN env var is set, skipping enrollment (env var takes precedence)',
+      )
+      return
+    }
+    // Lazy require — utils/auth.ts transitively pulls ~1300 modules
+    // (config → file → permissions → sessionStorage → commands). Daemon callers
+    // of getTrustedDeviceToken() don't need this; only /login does.
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { getClaudeAIOAuthTokens } =
+      require('../utils/auth.js') as typeof import('../utils/auth.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    const accessToken = getClaudeAIOAuthTokens()?.accessToken
+    if (!accessToken) {
+      logForDebugging('[trusted-device] No OAuth token, skipping enrollment')
+      return
+    }
+    // Always re-enroll on /login — the existing token may belong to a
+    // different account (account-switch without /logout). Skipping enrollment
+    // would send the old account's token on the new account's bridge calls.
+    const secureStorage = getSecureStorage()
+
+    if (isEssentialTrafficOnly()) {
+      logForDebugging(
+        '[trusted-device] Essential traffic only, skipping enrollment',
+      )
+      return
+    }
+
+    const baseUrl = getOauthConfig().BASE_API_URL
+    let response
+    try {
+      response = await axios.post<{
+        device_token?: string
+        device_id?: string
+      }>(
+        `${baseUrl}/api/auth/trusted_devices`,
+        { display_name: `Claude Code on ${hostname()} · ${process.platform}` },
+        {
+          headers: {
+            Authorization: `Bearer ${accessToken}`,
+            'Content-Type': 'application/json',
+          },
+          timeout: 10_000,
+          validateStatus: s => s < 500,
+        },
+      )
+    } catch (err: unknown) {
+      logForDebugging(
+        `[trusted-device] Enrollment request failed: ${errorMessage(err)}`,
+      )
+      return
+    }
+
+    if (response.status !== 200 && response.status !== 201) {
+      logForDebugging(
+        `[trusted-device] Enrollment failed ${response.status}: ${jsonStringify(response.data).slice(0, 200)}`,
+      )
+      return
+    }
+
+    const token = response.data?.device_token
+    if (!token || typeof token !== 'string') {
+      logForDebugging(
+        '[trusted-device] Enrollment response missing device_token field',
+      )
+      return
+    }
+
+    try {
+      const storageData = secureStorage.read()
+      if (!storageData) {
+        logForDebugging(
+          '[trusted-device] Cannot read storage, skipping token persist',
+        )
+        return
+      }
+      storageData.trustedDeviceToken = token
+      const result = secureStorage.update(storageData)
+      if (!result.success) {
+        logForDebugging(
+          `[trusted-device] Failed to persist token: ${result.warning ?? 'unknown'}`,
+        )
+        return
+      }
+      readStoredToken.cache?.clear?.()
+      logForDebugging(
+        `[trusted-device] Enrolled device_id=${response.data.device_id ?? 'unknown'}`,
+      )
+    } catch (err: unknown) {
+      logForDebugging(
+        `[trusted-device] Storage write failed: ${errorMessage(err)}`,
+      )
+    }
+  } catch (err: unknown) {
+    logForDebugging(`[trusted-device] Enrollment error: ${errorMessage(err)}`)
+  }
+}

+ 262 - 0
src/bridge/types.ts

@@ -0,0 +1,262 @@
+/** Default per-session timeout (24 hours). */
+export const DEFAULT_SESSION_TIMEOUT_MS = 24 * 60 * 60 * 1000
+
+/** Reusable login guidance appended to bridge auth errors. */
+export const BRIDGE_LOGIN_INSTRUCTION =
+  'Remote Control is only available with claude.ai subscriptions. Please use `/login` to sign in with your claude.ai account.'
+
+/** Full error printed when `claude remote-control` is run without auth. */
+export const BRIDGE_LOGIN_ERROR =
+  'Error: You must be logged in to use Remote Control.\n\n' +
+  BRIDGE_LOGIN_INSTRUCTION
+
+/** Shown when the user disconnects Remote Control (via /remote-control or ultraplan launch). */
+export const REMOTE_CONTROL_DISCONNECTED_MSG = 'Remote Control disconnected.'
+
+// --- Protocol types for the environments API ---
+
+export type WorkData = {
+  type: 'session' | 'healthcheck'
+  id: string
+}
+
+export type WorkResponse = {
+  id: string
+  type: 'work'
+  environment_id: string
+  state: string
+  data: WorkData
+  secret: string // base64url-encoded JSON
+  created_at: string
+}
+
+export type WorkSecret = {
+  version: number
+  session_ingress_token: string
+  api_base_url: string
+  sources: Array<{
+    type: string
+    git_info?: { type: string; repo: string; ref?: string; token?: string }
+  }>
+  auth: Array<{ type: string; token: string }>
+  claude_code_args?: Record<string, string> | null
+  mcp_config?: unknown | null
+  environment_variables?: Record<string, string> | null
+  /**
+   * Server-driven CCR v2 selector. Set by prepare_work_secret() when the
+   * session was created via the v2 compat layer (ccr_v2_compat_enabled).
+   * Same field the BYOC runner reads at environment-runner/sessionExecutor.ts.
+   */
+  use_code_sessions?: boolean
+}
+
+export type SessionDoneStatus = 'completed' | 'failed' | 'interrupted'
+
+export type SessionActivityType = 'tool_start' | 'text' | 'result' | 'error'
+
+export type SessionActivity = {
+  type: SessionActivityType
+  summary: string // e.g. "Editing src/foo.ts", "Reading package.json"
+  timestamp: number
+}
+
+/**
+ * How `claude remote-control` chooses session working directories.
+ * - `single-session`: one session in cwd, bridge tears down when it ends
+ * - `worktree`: persistent server, every session gets an isolated git worktree
+ * - `same-dir`: persistent server, every session shares cwd (can stomp each other)
+ */
+export type SpawnMode = 'single-session' | 'worktree' | 'same-dir'
+
+/**
+ * Well-known worker_type values THIS codebase produces. Sent as
+ * `metadata.worker_type` at environment registration so claude.ai can filter
+ * the session picker by origin (e.g. assistant tab only shows assistant
+ * workers). The backend treats this as an opaque string — desktop cowork
+ * sends `"cowork"`, which isn't in this union. REPL code uses this narrow
+ * type for its own exhaustiveness; wire-level fields accept any string.
+ */
+export type BridgeWorkerType = 'claude_code' | 'claude_code_assistant'
+
+export type BridgeConfig = {
+  dir: string
+  machineName: string
+  branch: string
+  gitRepoUrl: string | null
+  maxSessions: number
+  spawnMode: SpawnMode
+  verbose: boolean
+  sandbox: boolean
+  /** Client-generated UUID identifying this bridge instance. */
+  bridgeId: string
+  /**
+   * Sent as metadata.worker_type so web clients can filter by origin.
+   * Backend treats this as opaque — any string, not just BridgeWorkerType.
+   */
+  workerType: string
+  /** Client-generated UUID for idempotent environment registration. */
+  environmentId: string
+  /**
+   * Backend-issued environment_id to reuse on re-register. When set, the
+   * backend treats registration as a reconnect to the existing environment
+   * instead of creating a new one. Used by `claude remote-control
+   * --session-id` resume. Must be a backend-format ID — client UUIDs are
+   * rejected with 400.
+   */
+  reuseEnvironmentId?: string
+  /** API base URL the bridge is connected to (used for polling). */
+  apiBaseUrl: string
+  /** Session ingress base URL for WebSocket connections (may differ from apiBaseUrl locally). */
+  sessionIngressUrl: string
+  /** Debug file path passed via --debug-file. */
+  debugFile?: string
+  /** Per-session timeout in milliseconds. Sessions exceeding this are killed. */
+  sessionTimeoutMs?: number
+}
+
+// --- Dependency interfaces (for testability) ---
+
+/**
+ * A control_response event sent back to a session (e.g. a permission decision).
+ * The `subtype` is `'success'` per the SDK protocol; the inner `response`
+ * carries the permission decision payload (e.g. `{ behavior: 'allow' }`).
+ */
+export type PermissionResponseEvent = {
+  type: 'control_response'
+  response: {
+    subtype: 'success'
+    request_id: string
+    response: Record<string, unknown>
+  }
+}
+
+export type BridgeApiClient = {
+  registerBridgeEnvironment(config: BridgeConfig): Promise<{
+    environment_id: string
+    environment_secret: string
+  }>
+  pollForWork(
+    environmentId: string,
+    environmentSecret: string,
+    signal?: AbortSignal,
+    reclaimOlderThanMs?: number,
+  ): Promise<WorkResponse | null>
+  acknowledgeWork(
+    environmentId: string,
+    workId: string,
+    sessionToken: string,
+  ): Promise<void>
+  /** Stop a work item via the environments API. */
+  stopWork(environmentId: string, workId: string, force: boolean): Promise<void>
+  /** Deregister/delete the bridge environment on graceful shutdown. */
+  deregisterEnvironment(environmentId: string): Promise<void>
+  /** Send a permission response (control_response) to a session via the session events API. */
+  sendPermissionResponseEvent(
+    sessionId: string,
+    event: PermissionResponseEvent,
+    sessionToken: string,
+  ): Promise<void>
+  /** Archive a session so it no longer appears as active on the server. */
+  archiveSession(sessionId: string): Promise<void>
+  /**
+   * Force-stop stale worker instances and re-queue a session on an environment.
+   * Used by `--session-id` to resume a session after the original bridge died.
+   */
+  reconnectSession(environmentId: string, sessionId: string): Promise<void>
+  /**
+   * Send a lightweight heartbeat for an active work item, extending its lease.
+   * Uses SessionIngressAuth (JWT, no DB hit) instead of EnvironmentSecretAuth.
+   * Returns the server's response with lease status.
+   */
+  heartbeatWork(
+    environmentId: string,
+    workId: string,
+    sessionToken: string,
+  ): Promise<{ lease_extended: boolean; state: string }>
+}
+
+export type SessionHandle = {
+  sessionId: string
+  done: Promise<SessionDoneStatus>
+  kill(): void
+  forceKill(): void
+  activities: SessionActivity[] // ring buffer of recent activities (last ~10)
+  currentActivity: SessionActivity | null // most recent
+  accessToken: string // session_ingress_token for API calls
+  lastStderr: string[] // ring buffer of last stderr lines
+  writeStdin(data: string): void // write directly to child stdin
+  /** Update the access token for a running session (e.g. after token refresh). */
+  updateAccessToken(token: string): void
+}
+
+export type SessionSpawnOpts = {
+  sessionId: string
+  sdkUrl: string
+  accessToken: string
+  /** When true, spawn the child with CCR v2 env vars (SSE transport + CCRClient). */
+  useCcrV2?: boolean
+  /** Required when useCcrV2 is true. Obtained from POST /worker/register. */
+  workerEpoch?: number
+  /**
+   * Fires once with the text of the first real user message seen on the
+   * child's stdout (via --replay-user-messages). Lets the caller derive a
+   * session title when none exists yet. Tool-result and synthetic user
+   * messages are skipped.
+   */
+  onFirstUserMessage?: (text: string) => void
+}
+
+export type SessionSpawner = {
+  spawn(opts: SessionSpawnOpts, dir: string): SessionHandle
+}
+
+export type BridgeLogger = {
+  printBanner(config: BridgeConfig, environmentId: string): void
+  logSessionStart(sessionId: string, prompt: string): void
+  logSessionComplete(sessionId: string, durationMs: number): void
+  logSessionFailed(sessionId: string, error: string): void
+  logStatus(message: string): void
+  logVerbose(message: string): void
+  logError(message: string): void
+  /** Log a reconnection success event after recovering from connection errors. */
+  logReconnected(disconnectedMs: number): void
+  /** Show idle status with repo/branch info and shimmer animation. */
+  updateIdleStatus(): void
+  /** Show reconnecting status in the live display. */
+  updateReconnectingStatus(delayStr: string, elapsedStr: string): void
+  updateSessionStatus(
+    sessionId: string,
+    elapsed: string,
+    activity: SessionActivity,
+    trail: string[],
+  ): void
+  clearStatus(): void
+  /** Set repository info for status line display. */
+  setRepoInfo(repoName: string, branch: string): void
+  /** Set debug log glob shown above the status line (ant users). */
+  setDebugLogPath(path: string): void
+  /** Transition to "Attached" state when a session starts. */
+  setAttached(sessionId: string): void
+  /** Show failed status in the live display. */
+  updateFailedStatus(error: string): void
+  /** Toggle QR code visibility. */
+  toggleQr(): void
+  /** Update the "<n> of <m> sessions" indicator and spawn mode hint. */
+  updateSessionCount(active: number, max: number, mode: SpawnMode): void
+  /** Update the spawn mode shown in the session-count line. Pass null to hide (single-session or toggle unavailable). */
+  setSpawnModeDisplay(mode: 'same-dir' | 'worktree' | null): void
+  /** Register a new session for multi-session display (called after spawn succeeds). */
+  addSession(sessionId: string, url: string): void
+  /** Update the per-session activity summary (tool being run) in the multi-session list. */
+  updateSessionActivity(sessionId: string, activity: SessionActivity): void
+  /**
+   * Set a session's display title. In multi-session mode, updates the bullet list
+   * entry. In single-session mode, also shows the title in the main status line.
+   * Triggers a render (guarded against reconnecting/failed states).
+   */
+  setSessionTitle(sessionId: string, title: string): void
+  /** Remove a session from the multi-session display when it ends. */
+  removeSession(sessionId: string): void
+  /** Force a re-render of the status display (for multi-session activity refresh). */
+  refreshDisplay(): void
+}

+ 127 - 0
src/bridge/workSecret.ts

@@ -0,0 +1,127 @@
+import axios from 'axios'
+import { jsonParse, jsonStringify } from '../utils/slowOperations.js'
+import type { WorkSecret } from './types.js'
+
+/** Decode a base64url-encoded work secret and validate its version. */
+export function decodeWorkSecret(secret: string): WorkSecret {
+  const json = Buffer.from(secret, 'base64url').toString('utf-8')
+  const parsed: unknown = jsonParse(json)
+  if (
+    !parsed ||
+    typeof parsed !== 'object' ||
+    !('version' in parsed) ||
+    parsed.version !== 1
+  ) {
+    throw new Error(
+      `Unsupported work secret version: ${parsed && typeof parsed === 'object' && 'version' in parsed ? parsed.version : 'unknown'}`,
+    )
+  }
+  const obj = parsed as Record<string, unknown>
+  if (
+    typeof obj.session_ingress_token !== 'string' ||
+    obj.session_ingress_token.length === 0
+  ) {
+    throw new Error(
+      'Invalid work secret: missing or empty session_ingress_token',
+    )
+  }
+  if (typeof obj.api_base_url !== 'string') {
+    throw new Error('Invalid work secret: missing api_base_url')
+  }
+  return parsed as WorkSecret
+}
+
+/**
+ * Build a WebSocket SDK URL from the API base URL and session ID.
+ * Strips the HTTP(S) protocol and constructs a ws(s):// ingress URL.
+ *
+ * Uses /v2/ for localhost (direct to session-ingress, no Envoy rewrite)
+ * and /v1/ for production (Envoy rewrites /v1/ → /v2/).
+ */
+export function buildSdkUrl(apiBaseUrl: string, sessionId: string): string {
+  const isLocalhost =
+    apiBaseUrl.includes('localhost') || apiBaseUrl.includes('127.0.0.1')
+  const protocol = isLocalhost ? 'ws' : 'wss'
+  const version = isLocalhost ? 'v2' : 'v1'
+  const host = apiBaseUrl.replace(/^https?:\/\//, '').replace(/\/+$/, '')
+  return `${protocol}://${host}/${version}/session_ingress/ws/${sessionId}`
+}
+
+/**
+ * Compare two session IDs regardless of their tagged-ID prefix.
+ *
+ * Tagged IDs have the form {tag}_{body} or {tag}_staging_{body}, where the
+ * body encodes a UUID. CCR v2's compat layer returns `session_*` to v1 API
+ * clients (compat/convert.go:41) but the infrastructure layer (sandbox-gateway
+ * work queue, work poll response) uses `cse_*` (compat/CLAUDE.md:13). Both
+ * have the same underlying UUID.
+ *
+ * Without this, replBridge rejects its own session as "foreign" at the
+ * work-received check when the ccr_v2_compat_enabled gate is on.
+ */
+export function sameSessionId(a: string, b: string): boolean {
+  if (a === b) return true
+  // The body is everything after the last underscore — this handles both
+  // `{tag}_{body}` and `{tag}_staging_{body}`.
+  const aBody = a.slice(a.lastIndexOf('_') + 1)
+  const bBody = b.slice(b.lastIndexOf('_') + 1)
+  // Guard against IDs with no underscore (bare UUIDs): lastIndexOf returns -1,
+  // slice(0) returns the whole string, and we already checked a === b above.
+  // Require a minimum length to avoid accidental matches on short suffixes
+  // (e.g. single-char tag remnants from malformed IDs).
+  return aBody.length >= 4 && aBody === bBody
+}
+
+/**
+ * Build a CCR v2 session URL from the API base URL and session ID.
+ * Unlike buildSdkUrl, this returns an HTTP(S) URL (not ws://) and points at
+ * /v1/code/sessions/{id} — the child CC will derive the SSE stream path
+ * and worker endpoints from this base.
+ */
+export function buildCCRv2SdkUrl(
+  apiBaseUrl: string,
+  sessionId: string,
+): string {
+  const base = apiBaseUrl.replace(/\/+$/, '')
+  return `${base}/v1/code/sessions/${sessionId}`
+}
+
+/**
+ * Register this bridge as the worker for a CCR v2 session.
+ * Returns the worker_epoch, which must be passed to the child CC process
+ * so its CCRClient can include it in every heartbeat/state/event request.
+ *
+ * Mirrors what environment-manager does in the container path
+ * (api-go/environment-manager/cmd/cmd_task_run.go RegisterWorker).
+ */
+export async function registerWorker(
+  sessionUrl: string,
+  accessToken: string,
+): Promise<number> {
+  const response = await axios.post(
+    `${sessionUrl}/worker/register`,
+    {},
+    {
+      headers: {
+        Authorization: `Bearer ${accessToken}`,
+        'Content-Type': 'application/json',
+        'anthropic-version': '2023-06-01',
+      },
+      timeout: 10_000,
+    },
+  )
+  // protojson serializes int64 as a string to avoid JS number precision loss;
+  // the Go side may also return a number depending on encoder settings.
+  const raw = response.data?.worker_epoch
+  const epoch = typeof raw === 'string' ? Number(raw) : raw
+  if (
+    typeof epoch !== 'number' ||
+    !Number.isFinite(epoch) ||
+    !Number.isSafeInteger(epoch)
+  ) {
+    throw new Error(
+      `registerWorker: invalid worker_epoch in response: ${jsonStringify(response.data)}`,
+    )
+  }
+  return epoch
+}

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 370 - 0
src/buddy/CompanionSprite.tsx


+ 133 - 0
src/buddy/companion.ts

@@ -0,0 +1,133 @@
+import { getGlobalConfig } from '../utils/config.js'
+import {
+  type Companion,
+  type CompanionBones,
+  EYES,
+  HATS,
+  RARITIES,
+  RARITY_WEIGHTS,
+  type Rarity,
+  SPECIES,
+  STAT_NAMES,
+  type StatName,
+} from './types.js'
+
+// Mulberry32 — tiny seeded PRNG, good enough for picking ducks
+function mulberry32(seed: number): () => number {
+  let a = seed >>> 0
+  return function () {
+    a |= 0
+    a = (a + 0x6d2b79f5) | 0
+    let t = Math.imul(a ^ (a >>> 15), 1 | a)
+    t = (t + Math.imul(t ^ (t >>> 7), 61 | t)) ^ t
+    return ((t ^ (t >>> 14)) >>> 0) / 4294967296
+  }
+}
+
+function hashString(s: string): number {
+  if (typeof Bun !== 'undefined') {
+    return Number(BigInt(Bun.hash(s)) & 0xffffffffn)
+  }
+  let h = 2166136261
+  for (let i = 0; i < s.length; i++) {
+    h ^= s.charCodeAt(i)
+    h = Math.imul(h, 16777619)
+  }
+  return h >>> 0
+}
+
+function pick<T>(rng: () => number, arr: readonly T[]): T {
+  return arr[Math.floor(rng() * arr.length)]!
+}
+
+function rollRarity(rng: () => number): Rarity {
+  const total = Object.values(RARITY_WEIGHTS).reduce((a, b) => a + b, 0)
+  let roll = rng() * total
+  for (const rarity of RARITIES) {
+    roll -= RARITY_WEIGHTS[rarity]
+    if (roll < 0) return rarity
+  }
+  return 'common'
+}
+
+const RARITY_FLOOR: Record<Rarity, number> = {
+  common: 5,
+  uncommon: 15,
+  rare: 25,
+  epic: 35,
+  legendary: 50,
+}
+
+// One peak stat, one dump stat, rest scattered. Rarity bumps the floor.
+function rollStats(
+  rng: () => number,
+  rarity: Rarity,
+): Record<StatName, number> {
+  const floor = RARITY_FLOOR[rarity]
+  const peak = pick(rng, STAT_NAMES)
+  let dump = pick(rng, STAT_NAMES)
+  while (dump === peak) dump = pick(rng, STAT_NAMES)
+
+  const stats = {} as Record<StatName, number>
+  for (const name of STAT_NAMES) {
+    if (name === peak) {
+      stats[name] = Math.min(100, floor + 50 + Math.floor(rng() * 30))
+    } else if (name === dump) {
+      stats[name] = Math.max(1, floor - 10 + Math.floor(rng() * 15))
+    } else {
+      stats[name] = floor + Math.floor(rng() * 40)
+    }
+  }
+  return stats
+}
+
+const SALT = 'friend-2026-401'
+
+export type Roll = {
+  bones: CompanionBones
+  inspirationSeed: number
+}
+
+function rollFrom(rng: () => number): Roll {
+  const rarity = rollRarity(rng)
+  const bones: CompanionBones = {
+    rarity,
+    species: pick(rng, SPECIES),
+    eye: pick(rng, EYES),
+    hat: rarity === 'common' ? 'none' : pick(rng, HATS),
+    shiny: rng() < 0.01,
+    stats: rollStats(rng, rarity),
+  }
+  return { bones, inspirationSeed: Math.floor(rng() * 1e9) }
+}
+
+// Called from three hot paths (500ms sprite tick, per-keystroke PromptInput,
+// per-turn observer) with the same userId → cache the deterministic result.
+let rollCache: { key: string; value: Roll } | undefined
+export function roll(userId: string): Roll {
+  const key = userId + SALT
+  if (rollCache?.key === key) return rollCache.value
+  const value = rollFrom(mulberry32(hashString(key)))
+  rollCache = { key, value }
+  return value
+}
+
+export function rollWithSeed(seed: string): Roll {
+  return rollFrom(mulberry32(hashString(seed)))
+}
+
+export function companionUserId(): string {
+  const config = getGlobalConfig()
+  return config.oauthAccount?.accountUuid ?? config.userID ?? 'anon'
+}
+
+// Regenerate bones from userId, merge with stored soul. Bones never persist
+// so species renames and SPECIES-array edits can't break stored companions,
+// and editing config.companion can't fake a rarity.
+export function getCompanion(): Companion | undefined {
+  const stored = getGlobalConfig().companion
+  if (!stored) return undefined
+  const { bones } = roll(companionUserId())
+  // bones last so stale bones fields in old-format configs get overridden
+  return { ...stored, ...bones }
+}

+ 36 - 0
src/buddy/prompt.ts

@@ -0,0 +1,36 @@
+import { feature } from 'bun:bundle'
+import type { Message } from '../types/message.js'
+import type { Attachment } from '../utils/attachments.js'
+import { getGlobalConfig } from '../utils/config.js'
+import { getCompanion } from './companion.js'
+
+export function companionIntroText(name: string, species: string): string {
+  return `# Companion
+
+A small ${species} named ${name} sits beside the user's input box and occasionally comments in a speech bubble. You're not ${name} — it's a separate watcher.
+
+When the user addresses ${name} directly (by name), its bubble will answer. Your job in that moment is to stay out of the way: respond in ONE line or less, or just answer any part of the message meant for you. Don't explain that you're not ${name} — they know. Don't narrate what ${name} might say — the bubble handles that.`
+}
+
+export function getCompanionIntroAttachment(
+  messages: Message[] | undefined,
+): Attachment[] {
+  if (!feature('BUDDY')) return []
+  const companion = getCompanion()
+  if (!companion || getGlobalConfig().companionMuted) return []
+
+  // Skip if already announced for this companion.
+  for (const msg of messages ?? []) {
+    if (msg.type !== 'attachment') continue
+    if (msg.attachment.type !== 'companion_intro') continue
+    if (msg.attachment.name === companion.name) return []
+  }
+
+  return [
+    {
+      type: 'companion_intro',
+      name: companion.name,
+      species: companion.species,
+    },
+  ]
+}

+ 514 - 0
src/buddy/sprites.ts

@@ -0,0 +1,514 @@
+import type { CompanionBones, Eye, Hat, Species } from './types.js'
+import {
+  axolotl,
+  blob,
+  cactus,
+  capybara,
+  cat,
+  chonk,
+  dragon,
+  duck,
+  ghost,
+  goose,
+  mushroom,
+  octopus,
+  owl,
+  penguin,
+  rabbit,
+  robot,
+  snail,
+  turtle,
+} from './types.js'
+
+// Each sprite is 5 lines tall, 12 wide (after {E}→1char substitution).
+// Multiple frames per species for idle fidget animation.
+// Line 0 is the hat slot — must be blank in frames 0-1; frame 2 may use it.
+const BODIES: Record<Species, string[][]> = {
+  [duck]: [
+    [
+      '            ',
+      '    __      ',
+      '  <({E} )___  ',
+      '   (  ._>   ',
+      '    `--´    ',
+    ],
+    [
+      '            ',
+      '    __      ',
+      '  <({E} )___  ',
+      '   (  ._>   ',
+      '    `--´~   ',
+    ],
+    [
+      '            ',
+      '    __      ',
+      '  <({E} )___  ',
+      '   (  .__>  ',
+      '    `--´    ',
+    ],
+  ],
+  [goose]: [
+    [
+      '            ',
+      '     ({E}>    ',
+      '     ||     ',
+      '   _(__)_   ',
+      '    ^^^^    ',
+    ],
+    [
+      '            ',
+      '    ({E}>     ',
+      '     ||     ',
+      '   _(__)_   ',
+      '    ^^^^    ',
+    ],
+    [
+      '            ',
+      '     ({E}>>   ',
+      '     ||     ',
+      '   _(__)_   ',
+      '    ^^^^    ',
+    ],
+  ],
+  [blob]: [
+    [
+      '            ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (      )  ',
+      '   `----´   ',
+    ],
+    [
+      '            ',
+      '  .------.  ',
+      ' (  {E}  {E}  ) ',
+      ' (        ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '    .--.    ',
+      '   ({E}  {E})   ',
+      '   (    )   ',
+      '    `--´    ',
+    ],
+  ],
+  [cat]: [
+    [
+      '            ',
+      '   /\\_/\\    ',
+      '  ( {E}   {E})  ',
+      '  (  ω  )   ',
+      '  (")_(")   ',
+    ],
+    [
+      '            ',
+      '   /\\_/\\    ',
+      '  ( {E}   {E})  ',
+      '  (  ω  )   ',
+      '  (")_(")~  ',
+    ],
+    [
+      '            ',
+      '   /\\-/\\    ',
+      '  ( {E}   {E})  ',
+      '  (  ω  )   ',
+      '  (")_(")   ',
+    ],
+  ],
+  [dragon]: [
+    [
+      '            ',
+      '  /^\\  /^\\  ',
+      ' <  {E}  {E}  > ',
+      ' (   ~~   ) ',
+      '  `-vvvv-´  ',
+    ],
+    [
+      '            ',
+      '  /^\\  /^\\  ',
+      ' <  {E}  {E}  > ',
+      ' (        ) ',
+      '  `-vvvv-´  ',
+    ],
+    [
+      '   ~    ~   ',
+      '  /^\\  /^\\  ',
+      ' <  {E}  {E}  > ',
+      ' (   ~~   ) ',
+      '  `-vvvv-´  ',
+    ],
+  ],
+  [octopus]: [
+    [
+      '            ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (______)  ',
+      '  /\\/\\/\\/\\  ',
+    ],
+    [
+      '            ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (______)  ',
+      '  \\/\\/\\/\\/  ',
+    ],
+    [
+      '     o      ',
+      '   .----.   ',
+      '  ( {E}  {E} )  ',
+      '  (______)  ',
+      '  /\\/\\/\\/\\  ',
+    ],
+  ],
+  [owl]: [
+    [
+      '            ',
+      '   /\\  /\\   ',
+      '  (({E})({E}))  ',
+      '  (  ><  )  ',
+      '   `----´   ',
+    ],
+    [
+      '            ',
+      '   /\\  /\\   ',
+      '  (({E})({E}))  ',
+      '  (  ><  )  ',
+      '   .----.   ',
+    ],
+    [
+      '            ',
+      '   /\\  /\\   ',
+      '  (({E})(-))  ',
+      '  (  ><  )  ',
+      '   `----´   ',
+    ],
+  ],
+  [penguin]: [
+    [
+      '            ',
+      '  .---.     ',
+      '  ({E}>{E})     ',
+      ' /(   )\\    ',
+      '  `---´     ',
+    ],
+    [
+      '            ',
+      '  .---.     ',
+      '  ({E}>{E})     ',
+      ' |(   )|    ',
+      '  `---´     ',
+    ],
+    [
+      '  .---.     ',
+      '  ({E}>{E})     ',
+      ' /(   )\\    ',
+      '  `---´     ',
+      '   ~ ~      ',
+    ],
+  ],
+  [turtle]: [
+    [
+      '            ',
+      '   _,--._   ',
+      '  ( {E}  {E} )  ',
+      ' /[______]\\ ',
+      '  ``    ``  ',
+    ],
+    [
+      '            ',
+      '   _,--._   ',
+      '  ( {E}  {E} )  ',
+      ' /[______]\\ ',
+      '   ``  ``   ',
+    ],
+    [
+      '            ',
+      '   _,--._   ',
+      '  ( {E}  {E} )  ',
+      ' /[======]\\ ',
+      '  ``    ``  ',
+    ],
+  ],
+  [snail]: [
+    [
+      '            ',
+      ' {E}    .--.  ',
+      '  \\  ( @ )  ',
+      '   \\_`--´   ',
+      '  ~~~~~~~   ',
+    ],
+    [
+      '            ',
+      '  {E}   .--.  ',
+      '  |  ( @ )  ',
+      '   \\_`--´   ',
+      '  ~~~~~~~   ',
+    ],
+    [
+      '            ',
+      ' {E}    .--.  ',
+      '  \\  ( @  ) ',
+      '   \\_`--´   ',
+      '   ~~~~~~   ',
+    ],
+  ],
+  [ghost]: [
+    [
+      '            ',
+      '   .----.   ',
+      '  / {E}  {E} \\  ',
+      '  |      |  ',
+      '  ~`~``~`~  ',
+    ],
+    [
+      '            ',
+      '   .----.   ',
+      '  / {E}  {E} \\  ',
+      '  |      |  ',
+      '  `~`~~`~`  ',
+    ],
+    [
+      '    ~  ~    ',
+      '   .----.   ',
+      '  / {E}  {E} \\  ',
+      '  |      |  ',
+      '  ~~`~~`~~  ',
+    ],
+  ],
+  [axolotl]: [
+    [
+      '            ',
+      '}~(______)~{',
+      '}~({E} .. {E})~{',
+      '  ( .--. )  ',
+      '  (_/  \\_)  ',
+    ],
+    [
+      '            ',
+      '~}(______){~',
+      '~}({E} .. {E}){~',
+      '  ( .--. )  ',
+      '  (_/  \\_)  ',
+    ],
+    [
+      '            ',
+      '}~(______)~{',
+      '}~({E} .. {E})~{',
+      '  (  --  )  ',
+      '  ~_/  \\_~  ',
+    ],
+  ],
+  [capybara]: [
+    [
+      '            ',
+      '  n______n  ',
+      ' ( {E}    {E} ) ',
+      ' (   oo   ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '  n______n  ',
+      ' ( {E}    {E} ) ',
+      ' (   Oo   ) ',
+      '  `------´  ',
+    ],
+    [
+      '    ~  ~    ',
+      '  u______n  ',
+      ' ( {E}    {E} ) ',
+      ' (   oo   ) ',
+      '  `------´  ',
+    ],
+  ],
+  [cactus]: [
+    [
+      '            ',
+      ' n  ____  n ',
+      ' | |{E}  {E}| | ',
+      ' |_|    |_| ',
+      '   |    |   ',
+    ],
+    [
+      '            ',
+      '    ____    ',
+      ' n |{E}  {E}| n ',
+      ' |_|    |_| ',
+      '   |    |   ',
+    ],
+    [
+      ' n        n ',
+      ' |  ____  | ',
+      ' | |{E}  {E}| | ',
+      ' |_|    |_| ',
+      '   |    |   ',
+    ],
+  ],
+  [robot]: [
+    [
+      '            ',
+      '   .[||].   ',
+      '  [ {E}  {E} ]  ',
+      '  [ ==== ]  ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '   .[||].   ',
+      '  [ {E}  {E} ]  ',
+      '  [ -==- ]  ',
+      '  `------´  ',
+    ],
+    [
+      '     *      ',
+      '   .[||].   ',
+      '  [ {E}  {E} ]  ',
+      '  [ ==== ]  ',
+      '  `------´  ',
+    ],
+  ],
+  [rabbit]: [
+    [
+      '            ',
+      '   (\\__/)   ',
+      '  ( {E}  {E} )  ',
+      ' =(  ..  )= ',
+      '  (")__(")  ',
+    ],
+    [
+      '            ',
+      '   (|__/)   ',
+      '  ( {E}  {E} )  ',
+      ' =(  ..  )= ',
+      '  (")__(")  ',
+    ],
+    [
+      '            ',
+      '   (\\__/)   ',
+      '  ( {E}  {E} )  ',
+      ' =( .  . )= ',
+      '  (")__(")  ',
+    ],
+  ],
+  [mushroom]: [
+    [
+      '            ',
+      ' .-o-OO-o-. ',
+      '(__________)',
+      '   |{E}  {E}|   ',
+      '   |____|   ',
+    ],
+    [
+      '            ',
+      ' .-O-oo-O-. ',
+      '(__________)',
+      '   |{E}  {E}|   ',
+      '   |____|   ',
+    ],
+    [
+      '   . o  .   ',
+      ' .-o-OO-o-. ',
+      '(__________)',
+      '   |{E}  {E}|   ',
+      '   |____|   ',
+    ],
+  ],
+  [chonk]: [
+    [
+      '            ',
+      '  /\\    /\\  ',
+      ' ( {E}    {E} ) ',
+      ' (   ..   ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '  /\\    /|  ',
+      ' ( {E}    {E} ) ',
+      ' (   ..   ) ',
+      '  `------´  ',
+    ],
+    [
+      '            ',
+      '  /\\    /\\  ',
+      ' ( {E}    {E} ) ',
+      ' (   ..   ) ',
+      '  `------´~ ',
+    ],
+  ],
+}
+
+const HAT_LINES: Record<Hat, string> = {
+  none: '',
+  crown: '   \\^^^/    ',
+  tophat: '   [___]    ',
+  propeller: '    -+-     ',
+  halo: '   (   )    ',
+  wizard: '    /^\\     ',
+  beanie: '   (___)    ',
+  tinyduck: '    ,>      ',
+}
+
+export function renderSprite(bones: CompanionBones, frame = 0): string[] {
+  const frames = BODIES[bones.species]
+  const body = frames[frame % frames.length]!.map(line =>
+    line.replaceAll('{E}', bones.eye),
+  )
+  const lines = [...body]
+  // Only replace with hat if line 0 is empty (some fidget frames use it for smoke etc)
+  if (bones.hat !== 'none' && !lines[0]!.trim()) {
+    lines[0] = HAT_LINES[bones.hat]
+  }
+  // Drop blank hat slot — wastes a row in the Card and ambient sprite when
+  // there's no hat and the frame isn't using it for smoke/antenna/etc.
+  // Only safe when ALL frames have blank line 0; otherwise heights oscillate.
+  if (!lines[0]!.trim() && frames.every(f => !f[0]!.trim())) lines.shift()
+  return lines
+}
+
+export function spriteFrameCount(species: Species): number {
+  return BODIES[species].length
+}
+
+export function renderFace(bones: CompanionBones): string {
+  const eye: Eye = bones.eye
+  switch (bones.species) {
+    case duck:
+    case goose:
+      return `(${eye}>`
+    case blob:
+      return `(${eye}${eye})`
+    case cat:
+      return `=${eye}ω${eye}=`
+    case dragon:
+      return `<${eye}~${eye}>`
+    case octopus:
+      return `~(${eye}${eye})~`
+    case owl:
+      return `(${eye})(${eye})`
+    case penguin:
+      return `(${eye}>)`
+    case turtle:
+      return `[${eye}_${eye}]`
+    case snail:
+      return `${eye}(@)`
+    case ghost:
+      return `/${eye}${eye}\\`
+    case axolotl:
+      return `}${eye}.${eye}{`
+    case capybara:
+      return `(${eye}oo${eye})`
+    case cactus:
+      return `|${eye}  ${eye}|`
+    case robot:
+      return `[${eye}${eye}]`
+    case rabbit:
+      return `(${eye}..${eye})`
+    case mushroom:
+      return `|${eye}  ${eye}|`
+    case chonk:
+      return `(${eye}.${eye})`
+  }
+}

+ 148 - 0
src/buddy/types.ts

@@ -0,0 +1,148 @@
+export const RARITIES = [
+  'common',
+  'uncommon',
+  'rare',
+  'epic',
+  'legendary',
+] as const
+export type Rarity = (typeof RARITIES)[number]
+
+// One species name collides with a model-codename canary in excluded-strings.txt.
+// The check greps build output (not source), so runtime-constructing the value keeps
+// the literal out of the bundle while the check stays armed for the actual codename.
+// All species encoded uniformly; `as` casts are type-position only (erased pre-bundle).
+const c = String.fromCharCode
+// biome-ignore format: keep the species list compact
+
+export const duck = c(0x64,0x75,0x63,0x6b) as 'duck'
+export const goose = c(0x67, 0x6f, 0x6f, 0x73, 0x65) as 'goose'
+export const blob = c(0x62, 0x6c, 0x6f, 0x62) as 'blob'
+export const cat = c(0x63, 0x61, 0x74) as 'cat'
+export const dragon = c(0x64, 0x72, 0x61, 0x67, 0x6f, 0x6e) as 'dragon'
+export const octopus = c(0x6f, 0x63, 0x74, 0x6f, 0x70, 0x75, 0x73) as 'octopus'
+export const owl = c(0x6f, 0x77, 0x6c) as 'owl'
+export const penguin = c(0x70, 0x65, 0x6e, 0x67, 0x75, 0x69, 0x6e) as 'penguin'
+export const turtle = c(0x74, 0x75, 0x72, 0x74, 0x6c, 0x65) as 'turtle'
+export const snail = c(0x73, 0x6e, 0x61, 0x69, 0x6c) as 'snail'
+export const ghost = c(0x67, 0x68, 0x6f, 0x73, 0x74) as 'ghost'
+export const axolotl = c(0x61, 0x78, 0x6f, 0x6c, 0x6f, 0x74, 0x6c) as 'axolotl'
+export const capybara = c(
+  0x63,
+  0x61,
+  0x70,
+  0x79,
+  0x62,
+  0x61,
+  0x72,
+  0x61,
+) as 'capybara'
+export const cactus = c(0x63, 0x61, 0x63, 0x74, 0x75, 0x73) as 'cactus'
+export const robot = c(0x72, 0x6f, 0x62, 0x6f, 0x74) as 'robot'
+export const rabbit = c(0x72, 0x61, 0x62, 0x62, 0x69, 0x74) as 'rabbit'
+export const mushroom = c(
+  0x6d,
+  0x75,
+  0x73,
+  0x68,
+  0x72,
+  0x6f,
+  0x6f,
+  0x6d,
+) as 'mushroom'
+export const chonk = c(0x63, 0x68, 0x6f, 0x6e, 0x6b) as 'chonk'
+
+export const SPECIES = [
+  duck,
+  goose,
+  blob,
+  cat,
+  dragon,
+  octopus,
+  owl,
+  penguin,
+  turtle,
+  snail,
+  ghost,
+  axolotl,
+  capybara,
+  cactus,
+  robot,
+  rabbit,
+  mushroom,
+  chonk,
+] as const
+export type Species = (typeof SPECIES)[number] // biome-ignore format: keep compact
+
+export const EYES = ['·', '✦', '×', '◉', '@', '°'] as const
+export type Eye = (typeof EYES)[number]
+
+export const HATS = [
+  'none',
+  'crown',
+  'tophat',
+  'propeller',
+  'halo',
+  'wizard',
+  'beanie',
+  'tinyduck',
+] as const
+export type Hat = (typeof HATS)[number]
+
+export const STAT_NAMES = [
+  'DEBUGGING',
+  'PATIENCE',
+  'CHAOS',
+  'WISDOM',
+  'SNARK',
+] as const
+export type StatName = (typeof STAT_NAMES)[number]
+
+// Deterministic parts — derived from hash(userId)
+export type CompanionBones = {
+  rarity: Rarity
+  species: Species
+  eye: Eye
+  hat: Hat
+  shiny: boolean
+  stats: Record<StatName, number>
+}
+
+// Model-generated soul — stored in config after first hatch
+export type CompanionSoul = {
+  name: string
+  personality: string
+}
+
+export type Companion = CompanionBones &
+  CompanionSoul & {
+    hatchedAt: number
+  }
+
+// What actually persists in config. Bones are regenerated from hash(userId)
+// on every read so species renames don't break stored companions and users
+// can't edit their way to a legendary.
+export type StoredCompanion = CompanionSoul & { hatchedAt: number }
+
+export const RARITY_WEIGHTS = {
+  common: 60,
+  uncommon: 25,
+  rare: 10,
+  epic: 4,
+  legendary: 1,
+} as const satisfies Record<Rarity, number>
+
+export const RARITY_STARS = {
+  common: '★',
+  uncommon: '★★',
+  rare: '★★★',
+  epic: '★★★★',
+  legendary: '★★★★★',
+} as const satisfies Record<Rarity, string>
+
+export const RARITY_COLORS = {
+  common: 'inactive',
+  uncommon: 'success',
+  rare: 'permission',
+  epic: 'autoAccept',
+  legendary: 'warning',
+} as const satisfies Record<Rarity, keyof import('../utils/theme.js').Theme>

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 97 - 0
src/buddy/useBuddyNotification.tsx


+ 31 - 0
src/cli/exit.ts

@@ -0,0 +1,31 @@
+/**
+ * CLI exit helpers for subcommand handlers.
+ *
+ * Consolidates the 4-5 line "print + lint-suppress + exit" block that was
+ * copy-pasted ~60 times across `claude mcp *` / `claude plugin *` handlers.
+ * The `: never` return type lets TypeScript narrow control flow at call sites
+ * without a trailing `return`.
+ */
+/* eslint-disable custom-rules/no-process-exit -- centralized CLI exit point */
+
+// `return undefined as never` (not a post-exit throw) — tests spy on
+// process.exit and let it return. Call sites write `return cliError(...)`
+// where subsequent code would dereference narrowed-away values under mock.
+// cliError uses console.error (tests spy on console.error); cliOk uses
+// process.stdout.write (tests spy on process.stdout.write — Bun's console.log
+// doesn't route through a spied process.stdout.write).
+
+/** Write an error message to stderr (if given) and exit with code 1. */
+export function cliError(msg?: string): never {
+  // biome-ignore lint/suspicious/noConsole: centralized CLI error output
+  if (msg) console.error(msg)
+  process.exit(1)
+  return undefined as never
+}
+
+/** Write a message to stdout (if given) and exit with code 0. */
+export function cliOk(msg?: string): never {
+  if (msg) process.stdout.write(msg + '\n')
+  process.exit(0)
+  return undefined as never
+}

+ 70 - 0
src/cli/handlers/agents.ts

@@ -0,0 +1,70 @@
+/**
+ * Agents subcommand handler — prints the list of configured agents.
+ * Dynamically imported only when `claude agents` runs.
+ */
+
+import {
+  AGENT_SOURCE_GROUPS,
+  compareAgentsByName,
+  getOverrideSourceLabel,
+  type ResolvedAgent,
+  resolveAgentModelDisplay,
+  resolveAgentOverrides,
+} from '../../tools/AgentTool/agentDisplay.js'
+import {
+  getActiveAgentsFromList,
+  getAgentDefinitionsWithOverrides,
+} from '../../tools/AgentTool/loadAgentsDir.js'
+import { getCwd } from '../../utils/cwd.js'
+
+function formatAgent(agent: ResolvedAgent): string {
+  const model = resolveAgentModelDisplay(agent)
+  const parts = [agent.agentType]
+  if (model) {
+    parts.push(model)
+  }
+  if (agent.memory) {
+    parts.push(`${agent.memory} memory`)
+  }
+  return parts.join(' · ')
+}
+
+export async function agentsHandler(): Promise<void> {
+  const cwd = getCwd()
+  const { allAgents } = await getAgentDefinitionsWithOverrides(cwd)
+  const activeAgents = getActiveAgentsFromList(allAgents)
+  const resolvedAgents = resolveAgentOverrides(allAgents, activeAgents)
+
+  const lines: string[] = []
+  let totalActive = 0
+
+  for (const { label, source } of AGENT_SOURCE_GROUPS) {
+    const groupAgents = resolvedAgents
+      .filter(a => a.source === source)
+      .sort(compareAgentsByName)
+
+    if (groupAgents.length === 0) continue
+
+    lines.push(`${label}:`)
+    for (const agent of groupAgents) {
+      if (agent.overriddenBy) {
+        const winnerSource = getOverrideSourceLabel(agent.overriddenBy)
+        lines.push(`  (shadowed by ${winnerSource}) ${formatAgent(agent)}`)
+      } else {
+        lines.push(`  ${formatAgent(agent)}`)
+        totalActive++
+      }
+    }
+    lines.push('')
+  }
+
+  if (lines.length === 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('No agents found.')
+  } else {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(`${totalActive} active agents\n`)
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(lines.join('\n').trimEnd())
+  }
+}

+ 330 - 0
src/cli/handlers/auth.ts

@@ -0,0 +1,330 @@
+/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handler intentionally exits */
+
+import {
+  clearAuthRelatedCaches,
+  performLogout,
+} from '../../commands/logout/logout.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../../services/analytics/index.js'
+import { getSSLErrorHint } from '../../services/api/errorUtils.js'
+import { fetchAndStoreClaudeCodeFirstTokenDate } from '../../services/api/firstTokenDate.js'
+import {
+  createAndStoreApiKey,
+  fetchAndStoreUserRoles,
+  refreshOAuthToken,
+  shouldUseClaudeAIAuth,
+  storeOAuthAccountInfo,
+} from '../../services/oauth/client.js'
+import { getOauthProfileFromOauthToken } from '../../services/oauth/getOauthProfile.js'
+import { OAuthService } from '../../services/oauth/index.js'
+import type { OAuthTokens } from '../../services/oauth/types.js'
+import {
+  clearOAuthTokenCache,
+  getAnthropicApiKeyWithSource,
+  getAuthTokenSource,
+  getOauthAccountInfo,
+  getSubscriptionType,
+  isUsing3PServices,
+  saveOAuthTokensIfNeeded,
+  validateForceLoginOrg,
+} from '../../utils/auth.js'
+import { saveGlobalConfig } from '../../utils/config.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { isRunningOnHomespace } from '../../utils/envUtils.js'
+import { errorMessage } from '../../utils/errors.js'
+import { logError } from '../../utils/log.js'
+import { getAPIProvider } from '../../utils/model/providers.js'
+import { getInitialSettings } from '../../utils/settings/settings.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import {
+  buildAccountProperties,
+  buildAPIProviderProperties,
+} from '../../utils/status.js'
+
+/**
+ * Shared post-token-acquisition logic. Saves tokens, fetches profile/roles,
+ * and sets up the local auth state.
+ */
+export async function installOAuthTokens(tokens: OAuthTokens): Promise<void> {
+  // Clear old state before saving new credentials
+  await performLogout({ clearOnboarding: false })
+
+  // Reuse pre-fetched profile if available, otherwise fetch fresh
+  const profile =
+    tokens.profile ?? (await getOauthProfileFromOauthToken(tokens.accessToken))
+  if (profile) {
+    storeOAuthAccountInfo({
+      accountUuid: profile.account.uuid,
+      emailAddress: profile.account.email,
+      organizationUuid: profile.organization.uuid,
+      displayName: profile.account.display_name || undefined,
+      hasExtraUsageEnabled:
+        profile.organization.has_extra_usage_enabled ?? undefined,
+      billingType: profile.organization.billing_type ?? undefined,
+      subscriptionCreatedAt:
+        profile.organization.subscription_created_at ?? undefined,
+      accountCreatedAt: profile.account.created_at,
+    })
+  } else if (tokens.tokenAccount) {
+    // Fallback to token exchange account data when profile endpoint fails
+    storeOAuthAccountInfo({
+      accountUuid: tokens.tokenAccount.uuid,
+      emailAddress: tokens.tokenAccount.emailAddress,
+      organizationUuid: tokens.tokenAccount.organizationUuid,
+    })
+  }
+
+  const storageResult = saveOAuthTokensIfNeeded(tokens)
+  clearOAuthTokenCache()
+
+  if (storageResult.warning) {
+    logEvent('tengu_oauth_storage_warning', {
+      warning:
+        storageResult.warning as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+  }
+
+  // Roles and first-token-date may fail for limited-scope tokens (e.g.
+  // inference-only from setup-token). They're not required for core auth.
+  await fetchAndStoreUserRoles(tokens.accessToken).catch(err =>
+    logForDebugging(String(err), { level: 'error' }),
+  )
+
+  if (shouldUseClaudeAIAuth(tokens.scopes)) {
+    await fetchAndStoreClaudeCodeFirstTokenDate().catch(err =>
+      logForDebugging(String(err), { level: 'error' }),
+    )
+  } else {
+    // API key creation is critical for Console users — let it throw.
+    const apiKey = await createAndStoreApiKey(tokens.accessToken)
+    if (!apiKey) {
+      throw new Error(
+        'Unable to create API key. The server accepted the request but did not return a key.',
+      )
+    }
+  }
+
+  await clearAuthRelatedCaches()
+}
+
+export async function authLogin({
+  email,
+  sso,
+  console: useConsole,
+  claudeai,
+}: {
+  email?: string
+  sso?: boolean
+  console?: boolean
+  claudeai?: boolean
+}): Promise<void> {
+  if (useConsole && claudeai) {
+    process.stderr.write(
+      'Error: --console and --claudeai cannot be used together.\n',
+    )
+    process.exit(1)
+  }
+
+  const settings = getInitialSettings()
+  // forceLoginMethod is a hard constraint (enterprise setting) — matches ConsoleOAuthFlow behavior.
+  // Without it, --console selects Console; --claudeai (or no flag) selects claude.ai.
+  const loginWithClaudeAi = settings.forceLoginMethod
+    ? settings.forceLoginMethod === 'claudeai'
+    : !useConsole
+  const orgUUID = settings.forceLoginOrgUUID
+
+  // Fast path: if a refresh token is provided via env var, skip the browser
+  // OAuth flow and exchange it directly for tokens.
+  const envRefreshToken = process.env.CLAUDE_CODE_OAUTH_REFRESH_TOKEN
+  if (envRefreshToken) {
+    const envScopes = process.env.CLAUDE_CODE_OAUTH_SCOPES
+    if (!envScopes) {
+      process.stderr.write(
+        'CLAUDE_CODE_OAUTH_SCOPES is required when using CLAUDE_CODE_OAUTH_REFRESH_TOKEN.\n' +
+          'Set it to the space-separated scopes the refresh token was issued with\n' +
+          '(e.g. "user:inference" or "user:profile user:inference user:sessions:claude_code user:mcp_servers").\n',
+      )
+      process.exit(1)
+    }
+
+    const scopes = envScopes.split(/\s+/).filter(Boolean)
+
+    try {
+      logEvent('tengu_login_from_refresh_token', {})
+
+      const tokens = await refreshOAuthToken(envRefreshToken, { scopes })
+      await installOAuthTokens(tokens)
+
+      const orgResult = await validateForceLoginOrg()
+      if (!orgResult.valid) {
+        process.stderr.write(orgResult.message + '\n')
+        process.exit(1)
+      }
+
+      // Mark onboarding complete — interactive paths handle this via
+      // the Onboarding component, but the env var path skips it.
+      saveGlobalConfig(current => {
+        if (current.hasCompletedOnboarding) return current
+        return { ...current, hasCompletedOnboarding: true }
+      })
+
+      logEvent('tengu_oauth_success', {
+        loginWithClaudeAi: shouldUseClaudeAIAuth(tokens.scopes),
+      })
+      process.stdout.write('Login successful.\n')
+      process.exit(0)
+    } catch (err) {
+      logError(err)
+      const sslHint = getSSLErrorHint(err)
+      process.stderr.write(
+        `Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
+      )
+      process.exit(1)
+    }
+  }
+
+  const resolvedLoginMethod = sso ? 'sso' : undefined
+
+  const oauthService = new OAuthService()
+
+  try {
+    logEvent('tengu_oauth_flow_start', { loginWithClaudeAi })
+
+    const result = await oauthService.startOAuthFlow(
+      async url => {
+        process.stdout.write('Opening browser to sign in…\n')
+        process.stdout.write(`If the browser didn't open, visit: ${url}\n`)
+      },
+      {
+        loginWithClaudeAi,
+        loginHint: email,
+        loginMethod: resolvedLoginMethod,
+        orgUUID,
+      },
+    )
+
+    await installOAuthTokens(result)
+
+    const orgResult = await validateForceLoginOrg()
+    if (!orgResult.valid) {
+      process.stderr.write(orgResult.message + '\n')
+      process.exit(1)
+    }
+
+    logEvent('tengu_oauth_success', { loginWithClaudeAi })
+
+    process.stdout.write('Login successful.\n')
+    process.exit(0)
+  } catch (err) {
+    logError(err)
+    const sslHint = getSSLErrorHint(err)
+    process.stderr.write(
+      `Login failed: ${errorMessage(err)}\n${sslHint ? sslHint + '\n' : ''}`,
+    )
+    process.exit(1)
+  } finally {
+    oauthService.cleanup()
+  }
+}
+
+export async function authStatus(opts: {
+  json?: boolean
+  text?: boolean
+}): Promise<void> {
+  const { source: authTokenSource, hasToken } = getAuthTokenSource()
+  const { source: apiKeySource } = getAnthropicApiKeyWithSource()
+  const hasApiKeyEnvVar =
+    !!process.env.ANTHROPIC_API_KEY && !isRunningOnHomespace()
+  const oauthAccount = getOauthAccountInfo()
+  const subscriptionType = getSubscriptionType()
+  const using3P = isUsing3PServices()
+  const loggedIn =
+    hasToken || apiKeySource !== 'none' || hasApiKeyEnvVar || using3P
+
+  // Determine auth method
+  let authMethod: string = 'none'
+  if (using3P) {
+    authMethod = 'third_party'
+  } else if (authTokenSource === 'claude.ai') {
+    authMethod = 'claude.ai'
+  } else if (authTokenSource === 'apiKeyHelper') {
+    authMethod = 'api_key_helper'
+  } else if (authTokenSource !== 'none') {
+    authMethod = 'oauth_token'
+  } else if (apiKeySource === 'ANTHROPIC_API_KEY' || hasApiKeyEnvVar) {
+    authMethod = 'api_key'
+  } else if (apiKeySource === '/login managed key') {
+    authMethod = 'claude.ai'
+  }
+
+  if (opts.text) {
+    const properties = [
+      ...buildAccountProperties(),
+      ...buildAPIProviderProperties(),
+    ]
+    let hasAuthProperty = false
+    for (const prop of properties) {
+      const value =
+        typeof prop.value === 'string'
+          ? prop.value
+          : Array.isArray(prop.value)
+            ? prop.value.join(', ')
+            : null
+      if (value === null || value === 'none') {
+        continue
+      }
+      hasAuthProperty = true
+      if (prop.label) {
+        process.stdout.write(`${prop.label}: ${value}\n`)
+      } else {
+        process.stdout.write(`${value}\n`)
+      }
+    }
+    if (!hasAuthProperty && hasApiKeyEnvVar) {
+      process.stdout.write('API key: ANTHROPIC_API_KEY\n')
+    }
+    if (!loggedIn) {
+      process.stdout.write(
+        'Not logged in. Run claude auth login to authenticate.\n',
+      )
+    }
+  } else {
+    const apiProvider = getAPIProvider()
+    const resolvedApiKeySource =
+      apiKeySource !== 'none'
+        ? apiKeySource
+        : hasApiKeyEnvVar
+          ? 'ANTHROPIC_API_KEY'
+          : null
+    const output: Record<string, string | boolean | null> = {
+      loggedIn,
+      authMethod,
+      apiProvider,
+    }
+    if (resolvedApiKeySource) {
+      output.apiKeySource = resolvedApiKeySource
+    }
+    if (authMethod === 'claude.ai') {
+      output.email = oauthAccount?.emailAddress ?? null
+      output.orgId = oauthAccount?.organizationUuid ?? null
+      output.orgName = oauthAccount?.organizationName ?? null
+      output.subscriptionType = subscriptionType ?? null
+    }
+
+    process.stdout.write(jsonStringify(output, null, 2) + '\n')
+  }
+  process.exit(loggedIn ? 0 : 1)
+}
+
+export async function authLogout(): Promise<void> {
+  try {
+    await performLogout({ clearOnboarding: false })
+  } catch {
+    process.stderr.write('Failed to log out.\n')
+    process.exit(1)
+  }
+  process.stdout.write('Successfully logged out from your Anthropic account.\n')
+  process.exit(0)
+}

+ 170 - 0
src/cli/handlers/autoMode.ts

@@ -0,0 +1,170 @@
+/**
+ * Auto mode subcommand handlers — dump default/merged classifier rules and
+ * critique user-written rules. Dynamically imported when `claude auto-mode ...` runs.
+ */
+
+import { errorMessage } from '../../utils/errors.js'
+import {
+  getMainLoopModel,
+  parseUserSpecifiedModel,
+} from '../../utils/model/model.js'
+import {
+  type AutoModeRules,
+  buildDefaultExternalSystemPrompt,
+  getDefaultExternalAutoModeRules,
+} from '../../utils/permissions/yoloClassifier.js'
+import { getAutoModeConfig } from '../../utils/settings/settings.js'
+import { sideQuery } from '../../utils/sideQuery.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+
+function writeRules(rules: AutoModeRules): void {
+  process.stdout.write(jsonStringify(rules, null, 2) + '\n')
+}
+
+export function autoModeDefaultsHandler(): void {
+  writeRules(getDefaultExternalAutoModeRules())
+}
+
+/**
+ * Dump the effective auto mode config: user settings where provided, external
+ * defaults otherwise. Per-section REPLACE semantics — matches how
+ * buildYoloSystemPrompt resolves the external template (a non-empty user
+ * section replaces that section's defaults entirely; an empty/absent section
+ * falls through to defaults).
+ */
+export function autoModeConfigHandler(): void {
+  const config = getAutoModeConfig()
+  const defaults = getDefaultExternalAutoModeRules()
+  writeRules({
+    allow: config?.allow?.length ? config.allow : defaults.allow,
+    soft_deny: config?.soft_deny?.length
+      ? config.soft_deny
+      : defaults.soft_deny,
+    environment: config?.environment?.length
+      ? config.environment
+      : defaults.environment,
+  })
+}
+
+const CRITIQUE_SYSTEM_PROMPT =
+  'You are an expert reviewer of auto mode classifier rules for Claude Code.\n' +
+  '\n' +
+  'Claude Code has an "auto mode" that uses an AI classifier to decide whether ' +
+  'tool calls should be auto-approved or require user confirmation. Users can ' +
+  'write custom rules in three categories:\n' +
+  '\n' +
+  '- **allow**: Actions the classifier should auto-approve\n' +
+  '- **soft_deny**: Actions the classifier should block (require user confirmation)\n' +
+  "- **environment**: Context about the user's setup that helps the classifier make decisions\n" +
+  '\n' +
+  "Your job is to critique the user's custom rules for clarity, completeness, " +
+  'and potential issues. The classifier is an LLM that reads these rules as ' +
+  'part of its system prompt.\n' +
+  '\n' +
+  'For each rule, evaluate:\n' +
+  '1. **Clarity**: Is the rule unambiguous? Could the classifier misinterpret it?\n' +
+  "2. **Completeness**: Are there gaps or edge cases the rule doesn't cover?\n" +
+  '3. **Conflicts**: Do any of the rules conflict with each other?\n' +
+  '4. **Actionability**: Is the rule specific enough for the classifier to act on?\n' +
+  '\n' +
+  'Be concise and constructive. Only comment on rules that could be improved. ' +
+  'If all rules look good, say so.'
+
+export async function autoModeCritiqueHandler(options: {
+  model?: string
+}): Promise<void> {
+  const config = getAutoModeConfig()
+  const hasCustomRules =
+    (config?.allow?.length ?? 0) > 0 ||
+    (config?.soft_deny?.length ?? 0) > 0 ||
+    (config?.environment?.length ?? 0) > 0
+
+  if (!hasCustomRules) {
+    process.stdout.write(
+      'No custom auto mode rules found.\n\n' +
+        'Add rules to your settings file under autoMode.{allow, soft_deny, environment}.\n' +
+        'Run `claude auto-mode defaults` to see the default rules for reference.\n',
+    )
+    return
+  }
+
+  const model = options.model
+    ? parseUserSpecifiedModel(options.model)
+    : getMainLoopModel()
+
+  const defaults = getDefaultExternalAutoModeRules()
+  const classifierPrompt = buildDefaultExternalSystemPrompt()
+
+  const userRulesSummary =
+    formatRulesForCritique('allow', config?.allow ?? [], defaults.allow) +
+    formatRulesForCritique(
+      'soft_deny',
+      config?.soft_deny ?? [],
+      defaults.soft_deny,
+    ) +
+    formatRulesForCritique(
+      'environment',
+      config?.environment ?? [],
+      defaults.environment,
+    )
+
+  process.stdout.write('Analyzing your auto mode rules…\n\n')
+
+  let response
+  try {
+    response = await sideQuery({
+      querySource: 'auto_mode_critique',
+      model,
+      system: CRITIQUE_SYSTEM_PROMPT,
+      skipSystemPromptPrefix: true,
+      max_tokens: 4096,
+      messages: [
+        {
+          role: 'user',
+          content:
+            'Here is the full classifier system prompt that the auto mode classifier receives:\n\n' +
+            '<classifier_system_prompt>\n' +
+            classifierPrompt +
+            '\n</classifier_system_prompt>\n\n' +
+            "Here are the user's custom rules that REPLACE the corresponding default sections:\n\n" +
+            userRulesSummary +
+            '\nPlease critique these custom rules.',
+        },
+      ],
+    })
+  } catch (error) {
+    process.stderr.write(
+      'Failed to analyze rules: ' + errorMessage(error) + '\n',
+    )
+    process.exitCode = 1
+    return
+  }
+
+  const textBlock = response.content.find(block => block.type === 'text')
+  if (textBlock?.type === 'text') {
+    process.stdout.write(textBlock.text + '\n')
+  } else {
+    process.stdout.write('No critique was generated. Please try again.\n')
+  }
+}
+
+function formatRulesForCritique(
+  section: string,
+  userRules: string[],
+  defaultRules: string[],
+): string {
+  if (userRules.length === 0) return ''
+  const customLines = userRules.map(r => '- ' + r).join('\n')
+  const defaultLines = defaultRules.map(r => '- ' + r).join('\n')
+  return (
+    '## ' +
+    section +
+    ' (custom rules replacing defaults)\n' +
+    'Custom:\n' +
+    customLines +
+    '\n\n' +
+    'Defaults being replaced:\n' +
+    defaultLines +
+    '\n\n'
+  )
+}

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 361 - 0
src/cli/handlers/mcp.tsx


+ 878 - 0
src/cli/handlers/plugins.ts

@@ -0,0 +1,878 @@
+/**
+ * Plugin and marketplace subcommand handlers — extracted from main.tsx for lazy loading.
+ * These are dynamically imported only when `claude plugin *` or `claude plugin marketplace *` runs.
+ */
+/* eslint-disable custom-rules/no-process-exit -- CLI subcommand handlers intentionally exit */
+import figures from 'figures'
+import { basename, dirname } from 'path'
+import { setUseCoworkPlugins } from '../../bootstrap/state.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+  logEvent,
+} from '../../services/analytics/index.js'
+import {
+  disableAllPlugins,
+  disablePlugin,
+  enablePlugin,
+  installPlugin,
+  uninstallPlugin,
+  updatePluginCli,
+  VALID_INSTALLABLE_SCOPES,
+  VALID_UPDATE_SCOPES,
+} from '../../services/plugins/pluginCliCommands.js'
+import { getPluginErrorMessage } from '../../types/plugin.js'
+import { errorMessage } from '../../utils/errors.js'
+import { logError } from '../../utils/log.js'
+import { clearAllCaches } from '../../utils/plugins/cacheUtils.js'
+import { getInstallCounts } from '../../utils/plugins/installCounts.js'
+import {
+  isPluginInstalled,
+  loadInstalledPluginsV2,
+} from '../../utils/plugins/installedPluginsManager.js'
+import {
+  createPluginId,
+  loadMarketplacesWithGracefulDegradation,
+} from '../../utils/plugins/marketplaceHelpers.js'
+import {
+  addMarketplaceSource,
+  loadKnownMarketplacesConfig,
+  refreshAllMarketplaces,
+  refreshMarketplace,
+  removeMarketplaceSource,
+  saveMarketplaceToSettings,
+} from '../../utils/plugins/marketplaceManager.js'
+import { loadPluginMcpServers } from '../../utils/plugins/mcpPluginIntegration.js'
+import { parseMarketplaceInput } from '../../utils/plugins/parseMarketplaceInput.js'
+import {
+  parsePluginIdentifier,
+  scopeToSettingSource,
+} from '../../utils/plugins/pluginIdentifier.js'
+import { loadAllPlugins } from '../../utils/plugins/pluginLoader.js'
+import type { PluginSource } from '../../utils/plugins/schemas.js'
+import {
+  type ValidationResult,
+  validateManifest,
+  validatePluginContents,
+} from '../../utils/plugins/validatePlugin.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import { plural } from '../../utils/stringUtils.js'
+import { cliError, cliOk } from '../exit.js'
+
+// Re-export for main.tsx to reference in option definitions
+export { VALID_INSTALLABLE_SCOPES, VALID_UPDATE_SCOPES }
+
+/**
+ * Helper function to handle marketplace command errors consistently.
+ */
+export function handleMarketplaceError(error: unknown, action: string): never {
+  logError(error)
+  cliError(`${figures.cross} Failed to ${action}: ${errorMessage(error)}`)
+}
+
+function printValidationResult(result: ValidationResult): void {
+  if (result.errors.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(
+      `${figures.cross} Found ${result.errors.length} ${plural(result.errors.length, 'error')}:\n`,
+    )
+    result.errors.forEach(error => {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${error.path}: ${error.message}`)
+    })
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('')
+  }
+  if (result.warnings.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(
+      `${figures.warning} Found ${result.warnings.length} ${plural(result.warnings.length, 'warning')}:\n`,
+    )
+    result.warnings.forEach(warning => {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${warning.path}: ${warning.message}`)
+    })
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('')
+  }
+}
+
+// plugin validate
+export async function pluginValidateHandler(
+  manifestPath: string,
+  options: { cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    const result = await validateManifest(manifestPath)
+
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log(`Validating ${result.fileType} manifest: ${result.filePath}\n`)
+    printValidationResult(result)
+
+    // If this is a plugin manifest located inside a .claude-plugin directory,
+    // also validate the plugin's content files (skills, agents, commands,
+    // hooks). Works whether the user passed a directory or the plugin.json
+    // path directly.
+    let contentResults: ValidationResult[] = []
+    if (result.fileType === 'plugin') {
+      const manifestDir = dirname(result.filePath)
+      if (basename(manifestDir) === '.claude-plugin') {
+        contentResults = await validatePluginContents(dirname(manifestDir))
+        for (const r of contentResults) {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`Validating ${r.fileType}: ${r.filePath}\n`)
+          printValidationResult(r)
+        }
+      }
+    }
+
+    const allSuccess = result.success && contentResults.every(r => r.success)
+    const hasWarnings =
+      result.warnings.length > 0 ||
+      contentResults.some(r => r.warnings.length > 0)
+
+    if (allSuccess) {
+      cliOk(
+        hasWarnings
+          ? `${figures.tick} Validation passed with warnings`
+          : `${figures.tick} Validation passed`,
+      )
+    } else {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`${figures.cross} Validation failed`)
+      process.exit(1)
+    }
+  } catch (error) {
+    logError(error)
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.error(
+      `${figures.cross} Unexpected error during validation: ${errorMessage(error)}`,
+    )
+    process.exit(2)
+  }
+}
+
+// plugin list (lines 5217–5416)
+export async function pluginListHandler(options: {
+  json?: boolean
+  available?: boolean
+  cowork?: boolean
+}): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  logEvent('tengu_plugin_list_command', {})
+
+  const installedData = loadInstalledPluginsV2()
+  const { getPluginEditableScopes } = await import(
+    '../../utils/plugins/pluginStartupCheck.js'
+  )
+  const enabledPlugins = getPluginEditableScopes()
+
+  const pluginIds = Object.keys(installedData.plugins)
+
+  // Load all plugins once. The JSON and human paths both need:
+  //  - loadErrors (to show load failures per plugin)
+  //  - inline plugins (session-only via --plugin-dir, source='name@inline')
+  //    which are NOT in installedData.plugins (V2 bookkeeping) — they must
+  //    be surfaced separately or `plugin list` silently ignores --plugin-dir.
+  const {
+    enabled: loadedEnabled,
+    disabled: loadedDisabled,
+    errors: loadErrors,
+  } = await loadAllPlugins()
+  const allLoadedPlugins = [...loadedEnabled, ...loadedDisabled]
+  const inlinePlugins = allLoadedPlugins.filter(p =>
+    p.source.endsWith('@inline'),
+  )
+  // Path-level inline failures (dir doesn't exist, parse error before
+  // manifest is read) use source='inline[N]'. Plugin-level errors after
+  // manifest read use source='name@inline'. Collect both for the session
+  // section — these are otherwise invisible since they have no pluginId.
+  const inlineLoadErrors = loadErrors.filter(
+    e => e.source.endsWith('@inline') || e.source.startsWith('inline['),
+  )
+
+  if (options.json) {
+    // Create a map of plugin source to loaded plugin for quick lookup
+    const loadedPluginMap = new Map(allLoadedPlugins.map(p => [p.source, p]))
+
+    const plugins: Array<{
+      id: string
+      version: string
+      scope: string
+      enabled: boolean
+      installPath: string
+      installedAt?: string
+      lastUpdated?: string
+      projectPath?: string
+      mcpServers?: Record<string, unknown>
+      errors?: string[]
+    }> = []
+
+    for (const pluginId of pluginIds.sort()) {
+      const installations = installedData.plugins[pluginId]
+      if (!installations || installations.length === 0) continue
+
+      // Find loading errors for this plugin
+      const pluginName = parsePluginIdentifier(pluginId).name
+      const pluginErrors = loadErrors
+        .filter(
+          e =>
+            e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
+        )
+        .map(getPluginErrorMessage)
+
+      for (const installation of installations) {
+        // Try to find the loaded plugin to get MCP servers
+        const loadedPlugin = loadedPluginMap.get(pluginId)
+        let mcpServers: Record<string, unknown> | undefined
+
+        if (loadedPlugin) {
+          // Load MCP servers if not already cached
+          const servers =
+            loadedPlugin.mcpServers ||
+            (await loadPluginMcpServers(loadedPlugin))
+          if (servers && Object.keys(servers).length > 0) {
+            mcpServers = servers
+          }
+        }
+
+        plugins.push({
+          id: pluginId,
+          version: installation.version || 'unknown',
+          scope: installation.scope,
+          enabled: enabledPlugins.has(pluginId),
+          installPath: installation.installPath,
+          installedAt: installation.installedAt,
+          lastUpdated: installation.lastUpdated,
+          projectPath: installation.projectPath,
+          mcpServers,
+          errors: pluginErrors.length > 0 ? pluginErrors : undefined,
+        })
+      }
+    }
+
+    // Session-only plugins: scope='session', no install metadata.
+    // Filter from inlineLoadErrors (not loadErrors) so an installed plugin
+    // with the same manifest name doesn't cross-contaminate via e.plugin.
+    // The e.plugin fallback catches the dirName≠manifestName case:
+    // createPluginFromPath tags errors with `${dirName}@inline` but
+    // plugin.source is reassigned to `${manifest.name}@inline` afterward
+    // (pluginLoader.ts loadInlinePlugins), so e.source !== p.source when
+    // a dev checkout dir like ~/code/my-fork/ has manifest name 'cool-plugin'.
+    for (const p of inlinePlugins) {
+      const servers = p.mcpServers || (await loadPluginMcpServers(p))
+      const pErrors = inlineLoadErrors
+        .filter(
+          e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
+        )
+        .map(getPluginErrorMessage)
+      plugins.push({
+        id: p.source,
+        version: p.manifest.version ?? 'unknown',
+        scope: 'session',
+        enabled: p.enabled !== false,
+        installPath: p.path,
+        mcpServers:
+          servers && Object.keys(servers).length > 0 ? servers : undefined,
+        errors: pErrors.length > 0 ? pErrors : undefined,
+      })
+    }
+    // Path-level inline failures (--plugin-dir /nonexistent): no LoadedPlugin
+    // exists so the loop above can't surface them. Mirror the human-path
+    // handling so JSON consumers see the failure instead of silent omission.
+    for (const e of inlineLoadErrors.filter(e =>
+      e.source.startsWith('inline['),
+    )) {
+      plugins.push({
+        id: e.source,
+        version: 'unknown',
+        scope: 'session',
+        enabled: false,
+        installPath: 'path' in e ? e.path : '',
+        errors: [getPluginErrorMessage(e)],
+      })
+    }
+
+    // If --available is set, also load available plugins from marketplaces
+    if (options.available) {
+      const available: Array<{
+        pluginId: string
+        name: string
+        description?: string
+        marketplaceName: string
+        version?: string
+        source: PluginSource
+        installCount?: number
+      }> = []
+
+      try {
+        const [config, installCounts] = await Promise.all([
+          loadKnownMarketplacesConfig(),
+          getInstallCounts(),
+        ])
+        const { marketplaces } =
+          await loadMarketplacesWithGracefulDegradation(config)
+
+        for (const {
+          name: marketplaceName,
+          data: marketplace,
+        } of marketplaces) {
+          if (marketplace) {
+            for (const entry of marketplace.plugins) {
+              const pluginId = createPluginId(entry.name, marketplaceName)
+              // Only include plugins that are not already installed
+              if (!isPluginInstalled(pluginId)) {
+                available.push({
+                  pluginId,
+                  name: entry.name,
+                  description: entry.description,
+                  marketplaceName,
+                  version: entry.version,
+                  source: entry.source,
+                  installCount: installCounts?.get(pluginId),
+                })
+              }
+            }
+          }
+        }
+      } catch {
+        // Silently ignore marketplace loading errors
+      }
+
+      cliOk(jsonStringify({ installed: plugins, available }, null, 2))
+    } else {
+      cliOk(jsonStringify(plugins, null, 2))
+    }
+  }
+
+  if (pluginIds.length === 0 && inlinePlugins.length === 0) {
+    // inlineLoadErrors can exist with zero inline plugins (e.g. --plugin-dir
+    // points at a nonexistent path). Don't early-exit over them — fall
+    // through to the session section so the failure is visible.
+    if (inlineLoadErrors.length === 0) {
+      cliOk(
+        'No plugins installed. Use `claude plugin install` to install a plugin.',
+      )
+    }
+  }
+
+  if (pluginIds.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Installed plugins:\n')
+  }
+
+  for (const pluginId of pluginIds.sort()) {
+    const installations = installedData.plugins[pluginId]
+    if (!installations || installations.length === 0) continue
+
+    // Find loading errors for this plugin
+    const pluginName = parsePluginIdentifier(pluginId).name
+    const pluginErrors = loadErrors.filter(
+      e => e.source === pluginId || ('plugin' in e && e.plugin === pluginName),
+    )
+
+    for (const installation of installations) {
+      const isEnabled = enabledPlugins.has(pluginId)
+      const status =
+        pluginErrors.length > 0
+          ? `${figures.cross} failed to load`
+          : isEnabled
+            ? `${figures.tick} enabled`
+            : `${figures.cross} disabled`
+      const version = installation.version || 'unknown'
+      const scope = installation.scope
+
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${pluginId}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Version: ${version}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Scope: ${scope}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Status: ${status}`)
+      for (const error of pluginErrors) {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(`    Error: ${getPluginErrorMessage(error)}`)
+      }
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log('')
+    }
+  }
+
+  if (inlinePlugins.length > 0 || inlineLoadErrors.length > 0) {
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Session-only plugins (--plugin-dir):\n')
+    for (const p of inlinePlugins) {
+      // Same dirName≠manifestName fallback as the JSON path above — error
+      // sources use the dir basename but p.source uses the manifest name.
+      const pErrors = inlineLoadErrors.filter(
+        e => e.source === p.source || ('plugin' in e && e.plugin === p.name),
+      )
+      const status =
+        pErrors.length > 0
+          ? `${figures.cross} loaded with errors`
+          : `${figures.tick} loaded`
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${p.source}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Version: ${p.manifest.version ?? 'unknown'}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Path: ${p.path}`)
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`    Status: ${status}`)
+      for (const e of pErrors) {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(`    Error: ${getPluginErrorMessage(e)}`)
+      }
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log('')
+    }
+    // Path-level failures: no LoadedPlugin object exists. Show them so
+    // `--plugin-dir /typo` doesn't just silently produce nothing.
+    for (const e of inlineLoadErrors.filter(e =>
+      e.source.startsWith('inline['),
+    )) {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(
+        `  ${figures.pointer} ${e.source}: ${figures.cross} ${getPluginErrorMessage(e)}\n`,
+      )
+    }
+  }
+
+  cliOk()
+}
+
+// marketplace add (lines 5433–5487)
+export async function marketplaceAddHandler(
+  source: string,
+  options: { cowork?: boolean; sparse?: string[]; scope?: string },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    const parsed = await parseMarketplaceInput(source)
+
+    if (!parsed) {
+      cliError(
+        `${figures.cross} Invalid marketplace source format. Try: owner/repo, https://..., or ./path`,
+      )
+    }
+
+    if ('error' in parsed) {
+      cliError(`${figures.cross} ${parsed.error}`)
+    }
+
+    // Validate scope
+    const scope = options.scope ?? 'user'
+    if (scope !== 'user' && scope !== 'project' && scope !== 'local') {
+      cliError(
+        `${figures.cross} Invalid scope '${scope}'. Use: user, project, or local`,
+      )
+    }
+    const settingSource = scopeToSettingSource(scope)
+
+    let marketplaceSource = parsed
+
+    if (options.sparse && options.sparse.length > 0) {
+      if (
+        marketplaceSource.source === 'github' ||
+        marketplaceSource.source === 'git'
+      ) {
+        marketplaceSource = {
+          ...marketplaceSource,
+          sparsePaths: options.sparse,
+        }
+      } else {
+        cliError(
+          `${figures.cross} --sparse is only supported for github and git marketplace sources (got: ${marketplaceSource.source})`,
+        )
+      }
+    }
+
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Adding marketplace...')
+
+    const { name, alreadyMaterialized, resolvedSource } =
+      await addMarketplaceSource(marketplaceSource, message => {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(message)
+      })
+
+    // Write intent to settings at the requested scope
+    saveMarketplaceToSettings(name, { source: resolvedSource }, settingSource)
+
+    clearAllCaches()
+
+    let sourceType = marketplaceSource.source
+    if (marketplaceSource.source === 'github') {
+      sourceType =
+        marketplaceSource.repo as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+    }
+    logEvent('tengu_marketplace_added', {
+      source_type:
+        sourceType as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+
+    cliOk(
+      alreadyMaterialized
+        ? `${figures.tick} Marketplace '${name}' already on disk — declared in ${scope} settings`
+        : `${figures.tick} Successfully added marketplace: ${name} (declared in ${scope} settings)`,
+    )
+  } catch (error) {
+    handleMarketplaceError(error, 'add marketplace')
+  }
+}
+
+// marketplace list (lines 5497–5565)
+export async function marketplaceListHandler(options: {
+  json?: boolean
+  cowork?: boolean
+}): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    const config = await loadKnownMarketplacesConfig()
+    const names = Object.keys(config)
+
+    if (options.json) {
+      const marketplaces = names.sort().map(name => {
+        const marketplace = config[name]
+        const source = marketplace?.source
+        return {
+          name,
+          source: source?.source,
+          ...(source?.source === 'github' && { repo: source.repo }),
+          ...(source?.source === 'git' && { url: source.url }),
+          ...(source?.source === 'url' && { url: source.url }),
+          ...(source?.source === 'directory' && { path: source.path }),
+          ...(source?.source === 'file' && { path: source.path }),
+          installLocation: marketplace?.installLocation,
+        }
+      })
+      cliOk(jsonStringify(marketplaces, null, 2))
+    }
+
+    if (names.length === 0) {
+      cliOk('No marketplaces configured')
+    }
+
+    // biome-ignore lint/suspicious/noConsole:: intentional console output
+    console.log('Configured marketplaces:\n')
+    names.forEach(name => {
+      const marketplace = config[name]
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`  ${figures.pointer} ${name}`)
+
+      if (marketplace?.source) {
+        const src = marketplace.source
+        if (src.source === 'github') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: GitHub (${src.repo})`)
+        } else if (src.source === 'git') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: Git (${src.url})`)
+        } else if (src.source === 'url') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: URL (${src.url})`)
+        } else if (src.source === 'directory') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: Directory (${src.path})`)
+        } else if (src.source === 'file') {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.log(`    Source: File (${src.path})`)
+        }
+      }
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log('')
+    })
+
+    cliOk()
+  } catch (error) {
+    handleMarketplaceError(error, 'list marketplaces')
+  }
+}
+
+// marketplace remove (lines 5576–5598)
+export async function marketplaceRemoveHandler(
+  name: string,
+  options: { cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    await removeMarketplaceSource(name)
+    clearAllCaches()
+
+    logEvent('tengu_marketplace_removed', {
+      marketplace_name:
+        name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+
+    cliOk(`${figures.tick} Successfully removed marketplace: ${name}`)
+  } catch (error) {
+    handleMarketplaceError(error, 'remove marketplace')
+  }
+}
+
+// marketplace update (lines 5609–5672)
+export async function marketplaceUpdateHandler(
+  name: string | undefined,
+  options: { cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  try {
+    if (name) {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`Updating marketplace: ${name}...`)
+
+      await refreshMarketplace(name, message => {
+        // biome-ignore lint/suspicious/noConsole:: intentional console output
+        console.log(message)
+      })
+
+      clearAllCaches()
+
+      logEvent('tengu_marketplace_updated', {
+        marketplace_name:
+          name as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+
+      cliOk(`${figures.tick} Successfully updated marketplace: ${name}`)
+    } else {
+      const config = await loadKnownMarketplacesConfig()
+      const marketplaceNames = Object.keys(config)
+
+      if (marketplaceNames.length === 0) {
+        cliOk('No marketplaces configured')
+      }
+
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.log(`Updating ${marketplaceNames.length} marketplace(s)...`)
+
+      await refreshAllMarketplaces()
+      clearAllCaches()
+
+      logEvent('tengu_marketplace_updated_all', {
+        count:
+          marketplaceNames.length as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      })
+
+      cliOk(
+        `${figures.tick} Successfully updated ${marketplaceNames.length} marketplace(s)`,
+      )
+    }
+  } catch (error) {
+    handleMarketplaceError(error, 'update marketplace(s)')
+  }
+}
+
+// plugin install (lines 5690–5721)
+export async function pluginInstallHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  const scope = options.scope || 'user'
+  if (options.cowork && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+  if (
+    !VALID_INSTALLABLE_SCOPES.includes(
+      scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+    )
+  ) {
+    cliError(
+      `Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
+    )
+  }
+  // _PROTO_* routes to PII-tagged plugin_name/marketplace_name BQ columns.
+  // Unredacted plugin arg was previously logged to general-access
+  // additional_metadata for all users — dropped in favor of the privileged
+  // column route. marketplace may be undefined (fires before resolution).
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_install_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await installPlugin(plugin, scope as 'user' | 'project' | 'local')
+}
+
+// plugin uninstall (lines 5738–5769)
+export async function pluginUninstallHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean; keepData?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  const scope = options.scope || 'user'
+  if (options.cowork && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+  if (
+    !VALID_INSTALLABLE_SCOPES.includes(
+      scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+    )
+  ) {
+    cliError(
+      `Invalid scope: ${scope}. Must be one of: ${VALID_INSTALLABLE_SCOPES.join(', ')}.`,
+    )
+  }
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_uninstall_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: scope as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await uninstallPlugin(
+    plugin,
+    scope as 'user' | 'project' | 'local',
+    options.keepData,
+  )
+}
+
+// plugin enable (lines 5783–5818)
+export async function pluginEnableHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
+  if (options.scope) {
+    if (
+      !VALID_INSTALLABLE_SCOPES.includes(
+        options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+      )
+    ) {
+      cliError(
+        `Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
+      )
+    }
+    scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
+  }
+  if (options.cowork && scope !== undefined && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+
+  // --cowork always operates at user scope
+  if (options.cowork && scope === undefined) {
+    scope = 'user'
+  }
+
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_enable_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: (scope ??
+      'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await enablePlugin(plugin, scope)
+}
+
+// plugin disable (lines 5833–5902)
+export async function pluginDisableHandler(
+  plugin: string | undefined,
+  options: { scope?: string; cowork?: boolean; all?: boolean },
+): Promise<void> {
+  if (options.all && plugin) {
+    cliError('Cannot use --all with a specific plugin')
+  }
+
+  if (!options.all && !plugin) {
+    cliError('Please specify a plugin name or use --all to disable all plugins')
+  }
+
+  if (options.cowork) setUseCoworkPlugins(true)
+
+  if (options.all) {
+    if (options.scope) {
+      cliError('Cannot use --scope with --all')
+    }
+
+    // No _PROTO_plugin_name here — --all disables all plugins.
+    // Distinguishable from the specific-plugin branch by plugin_name IS NULL.
+    logEvent('tengu_plugin_disable_command', {})
+
+    await disableAllPlugins()
+    return
+  }
+
+  let scope: (typeof VALID_INSTALLABLE_SCOPES)[number] | undefined
+  if (options.scope) {
+    if (
+      !VALID_INSTALLABLE_SCOPES.includes(
+        options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number],
+      )
+    ) {
+      cliError(
+        `Invalid scope "${options.scope}". Valid scopes: ${VALID_INSTALLABLE_SCOPES.join(', ')}`,
+      )
+    }
+    scope = options.scope as (typeof VALID_INSTALLABLE_SCOPES)[number]
+  }
+  if (options.cowork && scope !== undefined && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+
+  // --cowork always operates at user scope
+  if (options.cowork && scope === undefined) {
+    scope = 'user'
+  }
+
+  const { name, marketplace } = parsePluginIdentifier(plugin!)
+  logEvent('tengu_plugin_disable_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+    scope: (scope ??
+      'auto') as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  })
+
+  await disablePlugin(plugin!, scope)
+}
+
+// plugin update (lines 5918–5948)
+export async function pluginUpdateHandler(
+  plugin: string,
+  options: { scope?: string; cowork?: boolean },
+): Promise<void> {
+  if (options.cowork) setUseCoworkPlugins(true)
+  const { name, marketplace } = parsePluginIdentifier(plugin)
+  logEvent('tengu_plugin_update_command', {
+    _PROTO_plugin_name: name as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    ...(marketplace && {
+      _PROTO_marketplace_name:
+        marketplace as AnalyticsMetadata_I_VERIFIED_THIS_IS_PII_TAGGED,
+    }),
+  })
+
+  let scope: (typeof VALID_UPDATE_SCOPES)[number] = 'user'
+  if (options.scope) {
+    if (
+      !VALID_UPDATE_SCOPES.includes(
+        options.scope as (typeof VALID_UPDATE_SCOPES)[number],
+      )
+    ) {
+      cliError(
+        `Invalid scope "${options.scope}". Valid scopes: ${VALID_UPDATE_SCOPES.join(', ')}`,
+      )
+    }
+    scope = options.scope as (typeof VALID_UPDATE_SCOPES)[number]
+  }
+  if (options.cowork && scope !== 'user') {
+    cliError('--cowork can only be used with user scope')
+  }
+
+  await updatePluginCli(plugin, scope)
+}

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 109 - 0
src/cli/handlers/util.tsx


+ 32 - 0
src/cli/ndjsonSafeStringify.ts

@@ -0,0 +1,32 @@
+import { jsonStringify } from '../utils/slowOperations.js'
+
+// JSON.stringify emits U+2028/U+2029 raw (valid per ECMA-404). When the
+// output is a single NDJSON line, any receiver that uses JavaScript
+// line-terminator semantics (ECMA-262 §11.3 — \n \r U+2028 U+2029) to
+// split the stream will cut the JSON mid-string. ProcessTransport now
+// silently skips non-JSON lines rather than crashing (gh-28405), but
+// the truncated fragment is still lost — the message is silently dropped.
+//
+// The \uXXXX form is equivalent JSON (parses to the same string) but
+// can never be mistaken for a line terminator by ANY receiver. This is
+// what ES2019's "Subsume JSON" proposal and Node's util.inspect do.
+//
+// Single regex with alternation: the callback's one dispatch per match
+// is cheaper than two full-string scans.
+const JS_LINE_TERMINATORS = /\u2028|\u2029/g
+
+function escapeJsLineTerminators(json: string): string {
+  return json.replace(JS_LINE_TERMINATORS, c =>
+    c === '\u2028' ? '\\u2028' : '\\u2029',
+  )
+}
+
+/**
+ * JSON.stringify for one-message-per-line transports. Escapes U+2028
+ * LINE SEPARATOR and U+2029 PARAGRAPH SEPARATOR so the serialized output
+ * cannot be broken by a line-splitting receiver. Output is still valid
+ * JSON and parses to the same value.
+ */
+export function ndjsonSafeStringify(value: unknown): string {
+  return escapeJsLineTerminators(jsonStringify(value))
+}

+ 5594 - 0
src/cli/print.ts

@@ -0,0 +1,5594 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+import { feature } from 'bun:bundle'
+import { readFile, stat } from 'fs/promises'
+import { dirname } from 'path'
+import {
+  downloadUserSettings,
+  redownloadUserSettings,
+} from 'src/services/settingsSync/index.js'
+import { waitForRemoteManagedSettingsToLoad } from 'src/services/remoteManagedSettings/index.js'
+import { StructuredIO } from 'src/cli/structuredIO.js'
+import { RemoteIO } from 'src/cli/remoteIO.js'
+import {
+  type Command,
+  formatDescriptionWithSource,
+  getCommandName,
+} from 'src/commands.js'
+import { createStreamlinedTransformer } from 'src/utils/streamlinedTransform.js'
+import { installStreamJsonStdoutGuard } from 'src/utils/streamJsonStdoutGuard.js'
+import type { ToolPermissionContext } from 'src/Tool.js'
+import type { ThinkingConfig } from 'src/utils/thinking.js'
+import { assembleToolPool, filterToolsByDenyRules } from 'src/tools.js'
+import uniqBy from 'lodash-es/uniqBy.js'
+import { uniq } from 'src/utils/array.js'
+import { mergeAndFilterTools } from 'src/utils/toolPool.js'
+import {
+  logEvent,
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+} from 'src/services/analytics/index.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from 'src/services/analytics/growthbook.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import {
+  logForDiagnosticsNoPII,
+  withDiagnosticsTiming,
+} from 'src/utils/diagLogs.js'
+import { toolMatchesName, type Tool, type Tools } from 'src/Tool.js'
+import {
+  type AgentDefinition,
+  isBuiltInAgent,
+  parseAgentsFromJson,
+} from 'src/tools/AgentTool/loadAgentsDir.js'
+import type { Message, NormalizedUserMessage } from 'src/types/message.js'
+import type { QueuedCommand } from 'src/types/textInputTypes.js'
+import {
+  dequeue,
+  dequeueAllMatching,
+  enqueue,
+  hasCommandsInQueue,
+  peek,
+  subscribeToCommandQueue,
+  getCommandsByMaxPriority,
+} from 'src/utils/messageQueueManager.js'
+import { notifyCommandLifecycle } from 'src/utils/commandLifecycle.js'
+import {
+  getSessionState,
+  notifySessionStateChanged,
+  notifySessionMetadataChanged,
+  setPermissionModeChangedListener,
+  type RequiresActionDetails,
+  type SessionExternalMetadata,
+} from 'src/utils/sessionState.js'
+import { externalMetadataToAppState } from 'src/state/onChangeAppState.js'
+import { getInMemoryErrors, logError, logMCPDebug } from 'src/utils/log.js'
+import {
+  writeToStdout,
+  registerProcessOutputErrorHandlers,
+} from 'src/utils/process.js'
+import type { Stream } from 'src/utils/stream.js'
+import { EMPTY_USAGE } from 'src/services/api/logging.js'
+import {
+  loadConversationForResume,
+  type TurnInterruptionState,
+} from 'src/utils/conversationRecovery.js'
+import type {
+  MCPServerConnection,
+  McpSdkServerConfig,
+  ScopedMcpServerConfig,
+} from 'src/services/mcp/types.js'
+import {
+  ChannelMessageNotificationSchema,
+  gateChannelServer,
+  wrapChannelMessage,
+  findChannelEntry,
+} from 'src/services/mcp/channelNotification.js'
+import {
+  isChannelAllowlisted,
+  isChannelsEnabled,
+} from 'src/services/mcp/channelAllowlist.js'
+import { parsePluginIdentifier } from 'src/utils/plugins/pluginIdentifier.js'
+import { validateUuid } from 'src/utils/uuid.js'
+import { fromArray } from 'src/utils/generators.js'
+import { ask } from 'src/QueryEngine.js'
+import type { PermissionPromptTool } from 'src/utils/queryHelpers.js'
+import {
+  createFileStateCacheWithSizeLimit,
+  mergeFileStateCaches,
+  READ_FILE_STATE_CACHE_SIZE,
+} from 'src/utils/fileStateCache.js'
+import { expandPath } from 'src/utils/path.js'
+import { extractReadFilesFromMessages } from 'src/utils/queryHelpers.js'
+import { registerHookEventHandler } from 'src/utils/hooks/hookEvents.js'
+import { executeFilePersistence } from 'src/utils/filePersistence/filePersistence.js'
+import { finalizePendingAsyncHooks } from 'src/utils/hooks/AsyncHookRegistry.js'
+import {
+  gracefulShutdown,
+  gracefulShutdownSync,
+  isShuttingDown,
+} from 'src/utils/gracefulShutdown.js'
+import { registerCleanup } from 'src/utils/cleanupRegistry.js'
+import { createIdleTimeoutManager } from 'src/utils/idleTimeout.js'
+import type {
+  SDKStatus,
+  ModelInfo,
+  SDKMessage,
+  SDKUserMessage,
+  SDKUserMessageReplay,
+  PermissionResult,
+  McpServerConfigForProcessTransport,
+  McpServerStatus,
+  RewindFilesResult,
+} from 'src/entrypoints/agentSdkTypes.js'
+import type {
+  StdoutMessage,
+  SDKControlInitializeRequest,
+  SDKControlInitializeResponse,
+  SDKControlRequest,
+  SDKControlResponse,
+  SDKControlMcpSetServersResponse,
+  SDKControlReloadPluginsResponse,
+} from 'src/entrypoints/sdk/controlTypes.js'
+import type { PermissionMode } from '@anthropic-ai/claude-agent-sdk'
+import type { PermissionMode as InternalPermissionMode } from 'src/types/permissions.js'
+import { cwd } from 'process'
+import { getCwd } from 'src/utils/cwd.js'
+import omit from 'lodash-es/omit.js'
+import reject from 'lodash-es/reject.js'
+import { isPolicyAllowed } from 'src/services/policyLimits/index.js'
+import type { ReplBridgeHandle } from 'src/bridge/replBridge.js'
+import { getRemoteSessionUrl } from 'src/constants/product.js'
+import { buildBridgeConnectUrl } from 'src/bridge/bridgeStatusUtil.js'
+import { extractInboundMessageFields } from 'src/bridge/inboundMessages.js'
+import { resolveAndPrepend } from 'src/bridge/inboundAttachments.js'
+import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js'
+import { hasPermissionsToUseTool } from 'src/utils/permissions/permissions.js'
+import { safeParseJSON } from 'src/utils/json.js'
+import {
+  outputSchema as permissionToolOutputSchema,
+  permissionPromptToolResultToPermissionDecision,
+} from 'src/utils/permissions/PermissionPromptToolResultSchema.js'
+import { createAbortController } from 'src/utils/abortController.js'
+import { createCombinedAbortSignal } from 'src/utils/combinedAbortSignal.js'
+import { generateSessionTitle } from 'src/utils/sessionTitle.js'
+import { buildSideQuestionFallbackParams } from 'src/utils/queryContext.js'
+import { runSideQuestion } from 'src/utils/sideQuestion.js'
+import {
+  processSessionStartHooks,
+  processSetupHooks,
+  takeInitialUserMessage,
+} from 'src/utils/sessionStart.js'
+import {
+  DEFAULT_OUTPUT_STYLE_NAME,
+  getAllOutputStyles,
+} from 'src/constants/outputStyles.js'
+import { TEAMMATE_MESSAGE_TAG, TICK_TAG } from 'src/constants/xml.js'
+import {
+  getSettings_DEPRECATED,
+  getSettingsWithSources,
+} from 'src/utils/settings/settings.js'
+import { settingsChangeDetector } from 'src/utils/settings/changeDetector.js'
+import { applySettingsChange } from 'src/utils/settings/applySettingsChange.js'
+import {
+  isFastModeAvailable,
+  isFastModeEnabled,
+  isFastModeSupportedByModel,
+  getFastModeState,
+} from 'src/utils/fastMode.js'
+import {
+  isAutoModeGateEnabled,
+  getAutoModeUnavailableNotification,
+  getAutoModeUnavailableReason,
+  isBypassPermissionsModeDisabled,
+  transitionPermissionMode,
+} from 'src/utils/permissions/permissionSetup.js'
+import {
+  tryGenerateSuggestion,
+  logSuggestionOutcome,
+  logSuggestionSuppressed,
+  type PromptVariant,
+} from 'src/services/PromptSuggestion/promptSuggestion.js'
+import { getLastCacheSafeParams } from 'src/utils/forkedAgent.js'
+import { getAccountInformation } from 'src/utils/auth.js'
+import { OAuthService } from 'src/services/oauth/index.js'
+import { installOAuthTokens } from 'src/cli/handlers/auth.js'
+import { getAPIProvider } from 'src/utils/model/providers.js'
+import type { HookCallbackMatcher } from 'src/types/hooks.js'
+import { AwsAuthStatusManager } from 'src/utils/awsAuthStatusManager.js'
+import type { HookEvent } from 'src/entrypoints/agentSdkTypes.js'
+import {
+  registerHookCallbacks,
+  setInitJsonSchema,
+  getInitJsonSchema,
+  setSdkAgentProgressSummariesEnabled,
+} from 'src/bootstrap/state.js'
+import { createSyntheticOutputTool } from 'src/tools/SyntheticOutputTool/SyntheticOutputTool.js'
+import { parseSessionIdentifier } from 'src/utils/sessionUrl.js'
+import {
+  hydrateRemoteSession,
+  hydrateFromCCRv2InternalEvents,
+  resetSessionFilePointer,
+  doesMessageExistInSession,
+  findUnresolvedToolUse,
+  recordAttributionSnapshot,
+  saveAgentSetting,
+  saveMode,
+  saveAiGeneratedTitle,
+  restoreSessionMetadata,
+} from 'src/utils/sessionStorage.js'
+import { incrementPromptCount } from 'src/utils/commitAttribution.js'
+import {
+  setupSdkMcpClients,
+  connectToServer,
+  clearServerCache,
+  fetchToolsForClient,
+  areMcpConfigsEqual,
+  reconnectMcpServerImpl,
+} from 'src/services/mcp/client.js'
+import {
+  filterMcpServersByPolicy,
+  getMcpConfigByName,
+  isMcpServerDisabled,
+  setMcpServerEnabled,
+} from 'src/services/mcp/config.js'
+import {
+  performMCPOAuthFlow,
+  revokeServerTokens,
+} from 'src/services/mcp/auth.js'
+import {
+  runElicitationHooks,
+  runElicitationResultHooks,
+} from 'src/services/mcp/elicitationHandler.js'
+import { executeNotificationHooks } from 'src/utils/hooks.js'
+import {
+  ElicitRequestSchema,
+  ElicitationCompleteNotificationSchema,
+} from '@modelcontextprotocol/sdk/types.js'
+import { getMcpPrefix } from 'src/services/mcp/mcpStringUtils.js'
+import {
+  commandBelongsToServer,
+  filterToolsByServer,
+} from 'src/services/mcp/utils.js'
+import { setupVscodeSdkMcp } from 'src/services/mcp/vscodeSdkMcp.js'
+import { getAllMcpConfigs } from 'src/services/mcp/config.js'
+import {
+  isQualifiedForGrove,
+  checkGroveForNonInteractive,
+} from 'src/services/api/grove.js'
+import {
+  toInternalMessages,
+  toSDKRateLimitInfo,
+} from 'src/utils/messages/mappers.js'
+import { createModelSwitchBreadcrumbs } from 'src/utils/messages.js'
+import { collectContextData } from 'src/commands/context/context-noninteractive.js'
+import { LOCAL_COMMAND_STDOUT_TAG } from 'src/constants/xml.js'
+import {
+  statusListeners,
+  type ClaudeAILimits,
+} from 'src/services/claudeAiLimits.js'
+import {
+  getDefaultMainLoopModel,
+  getMainLoopModel,
+  modelDisplayString,
+  parseUserSpecifiedModel,
+} from 'src/utils/model/model.js'
+import { getModelOptions } from 'src/utils/model/modelOptions.js'
+import {
+  modelSupportsEffort,
+  modelSupportsMaxEffort,
+  EFFORT_LEVELS,
+  resolveAppliedEffort,
+} from 'src/utils/effort.js'
+import { modelSupportsAdaptiveThinking } from 'src/utils/thinking.js'
+import { modelSupportsAutoMode } from 'src/utils/betas.js'
+import { ensureModelStringsInitialized } from 'src/utils/model/modelStrings.js'
+import {
+  getSessionId,
+  setMainLoopModelOverride,
+  setMainThreadAgentType,
+  switchSession,
+  isSessionPersistenceDisabled,
+  getIsRemoteMode,
+  getFlagSettingsInline,
+  setFlagSettingsInline,
+  getMainThreadAgentType,
+  getAllowedChannels,
+  setAllowedChannels,
+  type ChannelEntry,
+} from 'src/bootstrap/state.js'
+import { runWithWorkload, WORKLOAD_CRON } from 'src/utils/workloadContext.js'
+import type { UUID } from 'crypto'
+import { randomUUID } from 'crypto'
+import type { ContentBlockParam } from '@anthropic-ai/sdk/resources/messages.mjs'
+import type { AppState } from 'src/state/AppStateStore.js'
+import {
+  fileHistoryRewind,
+  fileHistoryCanRestore,
+  fileHistoryEnabled,
+  fileHistoryGetDiffStats,
+} from 'src/utils/fileHistory.js'
+import {
+  restoreAgentFromSession,
+  restoreSessionStateFromLog,
+} from 'src/utils/sessionRestore.js'
+import { SandboxManager } from 'src/utils/sandbox/sandbox-adapter.js'
+import {
+  headlessProfilerStartTurn,
+  headlessProfilerCheckpoint,
+  logHeadlessProfilerTurn,
+} from 'src/utils/headlessProfiler.js'
+import {
+  startQueryProfile,
+  logQueryProfileReport,
+} from 'src/utils/queryProfiler.js'
+import { asSessionId } from 'src/types/ids.js'
+import { jsonStringify } from '../utils/slowOperations.js'
+import { skillChangeDetector } from '../utils/skills/skillChangeDetector.js'
+import { getCommands, clearCommandsCache } from '../commands.js'
+import {
+  isBareMode,
+  isEnvTruthy,
+  isEnvDefinedFalsy,
+} from '../utils/envUtils.js'
+import { installPluginsForHeadless } from '../utils/plugins/headlessPluginInstall.js'
+import { refreshActivePlugins } from '../utils/plugins/refresh.js'
+import { loadAllPluginsCacheOnly } from '../utils/plugins/pluginLoader.js'
+import {
+  isTeamLead,
+  hasActiveInProcessTeammates,
+  hasWorkingInProcessTeammates,
+  waitForTeammatesToBecomeIdle,
+} from '../utils/teammate.js'
+import {
+  readUnreadMessages,
+  markMessagesAsRead,
+  isShutdownApproved,
+} from '../utils/teammateMailbox.js'
+import { removeTeammateFromTeamFile } from '../utils/swarm/teamHelpers.js'
+import { unassignTeammateTasks } from '../utils/tasks.js'
+import { getRunningTasks } from '../utils/task/framework.js'
+import { isBackgroundTask } from '../tasks/types.js'
+import { stopTask } from '../tasks/stopTask.js'
+import { drainSdkEvents } from '../utils/sdkEventQueue.js'
+import { initializeGrowthBook } from '../services/analytics/growthbook.js'
+import { errorMessage, toError } from '../utils/errors.js'
+import { sleep } from '../utils/sleep.js'
+import { isExtractModeActive } from '../memdir/paths.js'
+
+// Dead code elimination: conditional imports
+/* eslint-disable @typescript-eslint/no-require-imports */
+const coordinatorModeModule = feature('COORDINATOR_MODE')
+  ? (require('../coordinator/coordinatorMode.js') as typeof import('../coordinator/coordinatorMode.js'))
+  : null
+const proactiveModule =
+  feature('PROACTIVE') || feature('KAIROS')
+    ? (require('../proactive/index.js') as typeof import('../proactive/index.js'))
+    : null
+const cronSchedulerModule = feature('AGENT_TRIGGERS')
+  ? (require('../utils/cronScheduler.js') as typeof import('../utils/cronScheduler.js'))
+  : null
+const cronJitterConfigModule = feature('AGENT_TRIGGERS')
+  ? (require('../utils/cronJitterConfig.js') as typeof import('../utils/cronJitterConfig.js'))
+  : null
+const cronGate = feature('AGENT_TRIGGERS')
+  ? (require('../tools/ScheduleCronTool/prompt.js') as typeof import('../tools/ScheduleCronTool/prompt.js'))
+  : null
+const extractMemoriesModule = feature('EXTRACT_MEMORIES')
+  ? (require('../services/extractMemories/extractMemories.js') as typeof import('../services/extractMemories/extractMemories.js'))
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+const SHUTDOWN_TEAM_PROMPT = `<system-reminder>
+You are running in non-interactive mode and cannot return a response to the user until your team is shut down.
+
+You MUST shut down your team before preparing your final response:
+1. Use requestShutdown to ask each team member to shut down gracefully
+2. Wait for shutdown approvals
+3. Use the cleanup operation to clean up the team
+4. Only then provide your final response to the user
+
+The user cannot receive your response until the team is completely shut down.
+</system-reminder>
+
+Shut down your team and prepare your final response for the user.`
+
+// Track message UUIDs received during the current session runtime
+const MAX_RECEIVED_UUIDS = 10_000
+const receivedMessageUuids = new Set<UUID>()
+const receivedMessageUuidsOrder: UUID[] = []
+
+function trackReceivedMessageUuid(uuid: UUID): boolean {
+  if (receivedMessageUuids.has(uuid)) {
+    return false // duplicate
+  }
+  receivedMessageUuids.add(uuid)
+  receivedMessageUuidsOrder.push(uuid)
+  // Evict oldest entries when at capacity
+  if (receivedMessageUuidsOrder.length > MAX_RECEIVED_UUIDS) {
+    const toEvict = receivedMessageUuidsOrder.splice(
+      0,
+      receivedMessageUuidsOrder.length - MAX_RECEIVED_UUIDS,
+    )
+    for (const old of toEvict) {
+      receivedMessageUuids.delete(old)
+    }
+  }
+  return true // new UUID
+}
+
+type PromptValue = string | ContentBlockParam[]
+
+function toBlocks(v: PromptValue): ContentBlockParam[] {
+  return typeof v === 'string' ? [{ type: 'text', text: v }] : v
+}
+
+/**
+ * Join prompt values from multiple queued commands into one. Strings are
+ * newline-joined; if any value is a block array, all values are normalized
+ * to blocks and concatenated.
+ */
+export function joinPromptValues(values: PromptValue[]): PromptValue {
+  if (values.length === 1) return values[0]!
+  if (values.every(v => typeof v === 'string')) {
+    return values.join('\n')
+  }
+  return values.flatMap(toBlocks)
+}
+
+/**
+ * Whether `next` can be batched into the same ask() call as `head`. Only
+ * prompt-mode commands batch, and only when the workload tag matches (so the
+ * combined turn is attributed correctly) and the isMeta flag matches (so a
+ * proactive tick can't merge into a user prompt and lose its hidden-in-
+ * transcript marking when the head is spread over the merged command).
+ */
+export function canBatchWith(
+  head: QueuedCommand,
+  next: QueuedCommand | undefined,
+): boolean {
+  return (
+    next !== undefined &&
+    next.mode === 'prompt' &&
+    next.workload === head.workload &&
+    next.isMeta === head.isMeta
+  )
+}
+
+export async function runHeadless(
+  inputPrompt: string | AsyncIterable<string>,
+  getAppState: () => AppState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+  commands: Command[],
+  tools: Tools,
+  sdkMcpConfigs: Record<string, McpSdkServerConfig>,
+  agents: AgentDefinition[],
+  options: {
+    continue: boolean | undefined
+    resume: string | boolean | undefined
+    resumeSessionAt: string | undefined
+    verbose: boolean | undefined
+    outputFormat: string | undefined
+    jsonSchema: Record<string, unknown> | undefined
+    permissionPromptToolName: string | undefined
+    allowedTools: string[] | undefined
+    thinkingConfig: ThinkingConfig | undefined
+    maxTurns: number | undefined
+    maxBudgetUsd: number | undefined
+    taskBudget: { total: number } | undefined
+    systemPrompt: string | undefined
+    appendSystemPrompt: string | undefined
+    userSpecifiedModel: string | undefined
+    fallbackModel: string | undefined
+    teleport: string | true | null | undefined
+    sdkUrl: string | undefined
+    replayUserMessages: boolean | undefined
+    includePartialMessages: boolean | undefined
+    forkSession: boolean | undefined
+    rewindFiles: string | undefined
+    enableAuthStatus: boolean | undefined
+    agent: string | undefined
+    workload: string | undefined
+    setupTrigger?: 'init' | 'maintenance' | undefined
+    sessionStartHooksPromise?: ReturnType<typeof processSessionStartHooks>
+    setSDKStatus?: (status: SDKStatus) => void
+  },
+): Promise<void> {
+  if (
+    process.env.USER_TYPE === 'ant' &&
+    isEnvTruthy(process.env.CLAUDE_CODE_EXIT_AFTER_FIRST_RENDER)
+  ) {
+    process.stderr.write(
+      `\nStartup time: ${Math.round(process.uptime() * 1000)}ms\n`,
+    )
+    // eslint-disable-next-line custom-rules/no-process-exit
+    process.exit(0)
+  }
+
+  // Fire user settings download now so it overlaps with the MCP/tool setup
+  // below. Managed settings already started in main.tsx preAction; this gives
+  // user settings a similar head start. The cached promise is joined in
+  // installPluginsAndApplyMcpInBackground before plugin install reads
+  // enabledPlugins.
+  if (
+    feature('DOWNLOAD_USER_SETTINGS') &&
+    (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) || getIsRemoteMode())
+  ) {
+    void downloadUserSettings()
+  }
+
+  // In headless mode there is no React tree, so the useSettingsChange hook
+  // never runs. Subscribe directly so that settings changes (including
+  // managed-settings / policy updates) are fully applied.
+  settingsChangeDetector.subscribe(source => {
+    applySettingsChange(source, setAppState)
+
+    // In headless mode, also sync the denormalized fastMode field from
+    // settings. The TUI manages fastMode via the UI so it skips this.
+    if (isFastModeEnabled()) {
+      setAppState(prev => {
+        const s = prev.settings as Record<string, unknown>
+        const fastMode = s.fastMode === true && !s.fastModePerSessionOptIn
+        return { ...prev, fastMode }
+      })
+    }
+  })
+
+  // Proactive activation is now handled in main.tsx before getTools() so
+  // SleepTool passes isEnabled() filtering. This fallback covers the case
+  // where CLAUDE_CODE_PROACTIVE is set but main.tsx's check didn't fire
+  // (e.g. env was injected by the SDK transport after argv parsing).
+  if (
+    (feature('PROACTIVE') || feature('KAIROS')) &&
+    proactiveModule &&
+    !proactiveModule.isProactiveActive() &&
+    isEnvTruthy(process.env.CLAUDE_CODE_PROACTIVE)
+  ) {
+    proactiveModule.activateProactive('command')
+  }
+
+  // Periodically force a full GC to keep memory usage in check
+  if (typeof Bun !== 'undefined') {
+    const gcTimer = setInterval(Bun.gc, 1000)
+    gcTimer.unref()
+  }
+
+  // Start headless profiler for first turn
+  headlessProfilerStartTurn()
+  headlessProfilerCheckpoint('runHeadless_entry')
+
+  // Check Grove requirements for non-interactive consumer subscribers
+  if (await isQualifiedForGrove()) {
+    await checkGroveForNonInteractive()
+  }
+  headlessProfilerCheckpoint('after_grove_check')
+
+  // Initialize GrowthBook so feature flags take effect in headless mode.
+  // Without this, the disk cache is empty and all flags fall back to defaults.
+  void initializeGrowthBook()
+
+  if (options.resumeSessionAt && !options.resume) {
+    process.stderr.write(`Error: --resume-session-at requires --resume\n`)
+    gracefulShutdownSync(1)
+    return
+  }
+
+  if (options.rewindFiles && !options.resume) {
+    process.stderr.write(`Error: --rewind-files requires --resume\n`)
+    gracefulShutdownSync(1)
+    return
+  }
+
+  if (options.rewindFiles && inputPrompt) {
+    process.stderr.write(
+      `Error: --rewind-files is a standalone operation and cannot be used with a prompt\n`,
+    )
+    gracefulShutdownSync(1)
+    return
+  }
+
+  const structuredIO = getStructuredIO(inputPrompt, options)
+
+  // When emitting NDJSON for SDK clients, any stray write to stdout (debug
+  // prints, dependency console.log, library banners) breaks the client's
+  // line-by-line JSON parser. Install a guard that diverts non-JSON lines to
+  // stderr so the stream stays clean. Must run before the first
+  // structuredIO.write below.
+  if (options.outputFormat === 'stream-json') {
+    installStreamJsonStdoutGuard()
+  }
+
+  // #34044: if user explicitly set sandbox.enabled=true but deps are missing,
+  // isSandboxingEnabled() returns false silently. Surface the reason so users
+  // know their security config isn't being enforced.
+  const sandboxUnavailableReason = SandboxManager.getSandboxUnavailableReason()
+  if (sandboxUnavailableReason) {
+    if (SandboxManager.isSandboxRequired()) {
+      process.stderr.write(
+        `\nError: sandbox required but unavailable: ${sandboxUnavailableReason}\n` +
+          `  sandbox.failIfUnavailable is set — refusing to start without a working sandbox.\n\n`,
+      )
+      gracefulShutdownSync(1)
+      return
+    }
+    process.stderr.write(
+      `\n⚠ Sandbox disabled: ${sandboxUnavailableReason}\n` +
+        `  Commands will run WITHOUT sandboxing. Network and filesystem restrictions will NOT be enforced.\n\n`,
+    )
+  } else if (SandboxManager.isSandboxingEnabled()) {
+    // Initialize sandbox with a callback that forwards network permission
+    // requests to the SDK host via the can_use_tool control_request protocol.
+    // This must happen after structuredIO is created so we can send requests.
+    try {
+      await SandboxManager.initialize(structuredIO.createSandboxAskCallback())
+    } catch (err) {
+      process.stderr.write(`\n❌ Sandbox Error: ${errorMessage(err)}\n`)
+      gracefulShutdownSync(1, 'other')
+      return
+    }
+  }
+
+  if (options.outputFormat === 'stream-json' && options.verbose) {
+    registerHookEventHandler(event => {
+      const message: StdoutMessage = (() => {
+        switch (event.type) {
+          case 'started':
+            return {
+              type: 'system' as const,
+              subtype: 'hook_started' as const,
+              hook_id: event.hookId,
+              hook_name: event.hookName,
+              hook_event: event.hookEvent,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            }
+          case 'progress':
+            return {
+              type: 'system' as const,
+              subtype: 'hook_progress' as const,
+              hook_id: event.hookId,
+              hook_name: event.hookName,
+              hook_event: event.hookEvent,
+              stdout: event.stdout,
+              stderr: event.stderr,
+              output: event.output,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            }
+          case 'response':
+            return {
+              type: 'system' as const,
+              subtype: 'hook_response' as const,
+              hook_id: event.hookId,
+              hook_name: event.hookName,
+              hook_event: event.hookEvent,
+              output: event.output,
+              stdout: event.stdout,
+              stderr: event.stderr,
+              exit_code: event.exitCode,
+              outcome: event.outcome,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            }
+        }
+      })()
+      void structuredIO.write(message)
+    })
+  }
+
+  if (options.setupTrigger) {
+    await processSetupHooks(options.setupTrigger)
+  }
+
+  headlessProfilerCheckpoint('before_loadInitialMessages')
+  const appState = getAppState()
+  const {
+    messages: initialMessages,
+    turnInterruptionState,
+    agentSetting: resumedAgentSetting,
+  } = await loadInitialMessages(setAppState, {
+    continue: options.continue,
+    teleport: options.teleport,
+    resume: options.resume,
+    resumeSessionAt: options.resumeSessionAt,
+    forkSession: options.forkSession,
+    outputFormat: options.outputFormat,
+    sessionStartHooksPromise: options.sessionStartHooksPromise,
+    restoredWorkerState: structuredIO.restoredWorkerState,
+  })
+
+  // SessionStart hooks can emit initialUserMessage — the first user turn for
+  // headless orchestrator sessions where stdin is empty and additionalContext
+  // alone (an attachment, not a turn) would leave the REPL with nothing to
+  // respond to. The hook promise is awaited inside loadInitialMessages, so the
+  // module-level pending value is set by the time we get here.
+  const hookInitialUserMessage = takeInitialUserMessage()
+  if (hookInitialUserMessage) {
+    structuredIO.prependUserMessage(hookInitialUserMessage)
+  }
+
+  // Restore agent setting from the resumed session (if not overridden by current --agent flag
+  // or settings-based agent, which would already have set mainThreadAgentType in main.tsx)
+  if (!options.agent && !getMainThreadAgentType() && resumedAgentSetting) {
+    const { agentDefinition: restoredAgent } = restoreAgentFromSession(
+      resumedAgentSetting,
+      undefined,
+      { activeAgents: agents, allAgents: agents },
+    )
+    if (restoredAgent) {
+      setAppState(prev => ({ ...prev, agent: restoredAgent.agentType }))
+      // Apply the agent's system prompt for non-built-in agents (mirrors main.tsx initial --agent path)
+      if (!options.systemPrompt && !isBuiltInAgent(restoredAgent)) {
+        const agentSystemPrompt = restoredAgent.getSystemPrompt()
+        if (agentSystemPrompt) {
+          options.systemPrompt = agentSystemPrompt
+        }
+      }
+      // Re-persist agent setting so future resumes maintain the agent
+      saveAgentSetting(restoredAgent.agentType)
+    }
+  }
+
+  // gracefulShutdownSync schedules an async shutdown and sets process.exitCode.
+  // If a loadInitialMessages error path triggered it, bail early to avoid
+  // unnecessary work while the process winds down.
+  if (initialMessages.length === 0 && process.exitCode !== undefined) {
+    return
+  }
+
+  // Handle --rewind-files: restore filesystem and exit immediately
+  if (options.rewindFiles) {
+    // File history snapshots are only created for user messages,
+    // so we require the target to be a user message
+    const targetMessage = initialMessages.find(
+      m => m.uuid === options.rewindFiles,
+    )
+
+    if (!targetMessage || targetMessage.type !== 'user') {
+      process.stderr.write(
+        `Error: --rewind-files requires a user message UUID, but ${options.rewindFiles} is not a user message in this session\n`,
+      )
+      gracefulShutdownSync(1)
+      return
+    }
+
+    const currentAppState = getAppState()
+    const result = await handleRewindFiles(
+      options.rewindFiles as UUID,
+      currentAppState,
+      setAppState,
+      false,
+    )
+    if (!result.canRewind) {
+      process.stderr.write(`Error: ${result.error || 'Unexpected error'}\n`)
+      gracefulShutdownSync(1)
+      return
+    }
+
+    // Rewind complete - exit successfully
+    process.stdout.write(
+      `Files rewound to state at message ${options.rewindFiles}\n`,
+    )
+    gracefulShutdownSync(0)
+    return
+  }
+
+  // Check if we need input prompt - skip if we're resuming with a valid session ID/JSONL file or using SDK URL
+  const hasValidResumeSessionId =
+    typeof options.resume === 'string' &&
+    (Boolean(validateUuid(options.resume)) || options.resume.endsWith('.jsonl'))
+  const isUsingSdkUrl = Boolean(options.sdkUrl)
+
+  if (!inputPrompt && !hasValidResumeSessionId && !isUsingSdkUrl) {
+    process.stderr.write(
+      `Error: Input must be provided either through stdin or as a prompt argument when using --print\n`,
+    )
+    gracefulShutdownSync(1)
+    return
+  }
+
+  if (options.outputFormat === 'stream-json' && !options.verbose) {
+    process.stderr.write(
+      'Error: When using --print, --output-format=stream-json requires --verbose\n',
+    )
+    gracefulShutdownSync(1)
+    return
+  }
+
+  // Filter out MCP tools that are in the deny list
+  const allowedMcpTools = filterToolsByDenyRules(
+    appState.mcp.tools,
+    appState.toolPermissionContext,
+  )
+  let filteredTools = [...tools, ...allowedMcpTools]
+
+  // When using SDK URL, always use stdio permission prompting to delegate to the SDK
+  const effectivePermissionPromptToolName = options.sdkUrl
+    ? 'stdio'
+    : options.permissionPromptToolName
+
+  // Callback for when a permission prompt is shown
+  const onPermissionPrompt = (details: RequiresActionDetails) => {
+    if (feature('COMMIT_ATTRIBUTION')) {
+      setAppState(prev => ({
+        ...prev,
+        attribution: {
+          ...prev.attribution,
+          permissionPromptCount: prev.attribution.permissionPromptCount + 1,
+        },
+      }))
+    }
+    notifySessionStateChanged('requires_action', details)
+  }
+
+  const canUseTool = getCanUseToolFn(
+    effectivePermissionPromptToolName,
+    structuredIO,
+    () => getAppState().mcp.tools,
+    onPermissionPrompt,
+  )
+  if (options.permissionPromptToolName) {
+    // Remove the permission prompt tool from the list of available tools.
+    filteredTools = filteredTools.filter(
+      tool => !toolMatchesName(tool, options.permissionPromptToolName!),
+    )
+  }
+
+  // Install errors handlers to gracefully handle broken pipes (e.g., when parent process dies)
+  registerProcessOutputErrorHandlers()
+
+  headlessProfilerCheckpoint('after_loadInitialMessages')
+
+  // Ensure model strings are initialized before generating model options.
+  // For Bedrock users, this waits for the profile fetch to get correct region strings.
+  await ensureModelStringsInitialized()
+  headlessProfilerCheckpoint('after_modelStrings')
+
+  // UDS inbox store registration is deferred until after `run` is defined
+  // so we can pass `run` as the onEnqueue callback (see below).
+
+  // Only `json` + `verbose` needs the full array (jsonStringify(messages) below).
+  // For stream-json (SDK/CCR) and default text output, only the last message is
+  // read for the exit code / final result. Avoid accumulating every message in
+  // memory for the entire session.
+  const needsFullArray = options.outputFormat === 'json' && options.verbose
+  const messages: SDKMessage[] = []
+  let lastMessage: SDKMessage | undefined
+  // Streamlined mode transforms messages when CLAUDE_CODE_STREAMLINED_OUTPUT=true and using stream-json
+  // Build flag gates this out of external builds; env var is the runtime opt-in for ant builds
+  const transformToStreamlined =
+    feature('STREAMLINED_OUTPUT') &&
+    isEnvTruthy(process.env.CLAUDE_CODE_STREAMLINED_OUTPUT) &&
+    options.outputFormat === 'stream-json'
+      ? createStreamlinedTransformer()
+      : null
+
+  headlessProfilerCheckpoint('before_runHeadlessStreaming')
+  for await (const message of runHeadlessStreaming(
+    structuredIO,
+    appState.mcp.clients,
+    [...commands, ...appState.mcp.commands],
+    filteredTools,
+    initialMessages,
+    canUseTool,
+    sdkMcpConfigs,
+    getAppState,
+    setAppState,
+    agents,
+    options,
+    turnInterruptionState,
+  )) {
+    if (transformToStreamlined) {
+      // Streamlined mode: transform messages and stream immediately
+      const transformed = transformToStreamlined(message)
+      if (transformed) {
+        await structuredIO.write(transformed)
+      }
+    } else if (options.outputFormat === 'stream-json' && options.verbose) {
+      await structuredIO.write(message)
+    }
+    // Should not be getting control messages or stream events in non-stream mode.
+    // Also filter out streamlined types since they're only produced by the transformer.
+    // SDK-only system events are excluded so lastMessage stays at the result
+    // (session_state_changed(idle) and any late task_notification drain after
+    // result in the finally block).
+    if (
+      message.type !== 'control_response' &&
+      message.type !== 'control_request' &&
+      message.type !== 'control_cancel_request' &&
+      !(
+        message.type === 'system' &&
+        (message.subtype === 'session_state_changed' ||
+          message.subtype === 'task_notification' ||
+          message.subtype === 'task_started' ||
+          message.subtype === 'task_progress' ||
+          message.subtype === 'post_turn_summary')
+      ) &&
+      message.type !== 'stream_event' &&
+      message.type !== 'keep_alive' &&
+      message.type !== 'streamlined_text' &&
+      message.type !== 'streamlined_tool_use_summary' &&
+      message.type !== 'prompt_suggestion'
+    ) {
+      if (needsFullArray) {
+        messages.push(message)
+      }
+      lastMessage = message
+    }
+  }
+
+  switch (options.outputFormat) {
+    case 'json':
+      if (!lastMessage || lastMessage.type !== 'result') {
+        throw new Error('No messages returned')
+      }
+      if (options.verbose) {
+        writeToStdout(jsonStringify(messages) + '\n')
+        break
+      }
+      writeToStdout(jsonStringify(lastMessage) + '\n')
+      break
+    case 'stream-json':
+      // already logged above
+      break
+    default:
+      if (!lastMessage || lastMessage.type !== 'result') {
+        throw new Error('No messages returned')
+      }
+      switch (lastMessage.subtype) {
+        case 'success':
+          writeToStdout(
+            lastMessage.result.endsWith('\n')
+              ? lastMessage.result
+              : lastMessage.result + '\n',
+          )
+          break
+        case 'error_during_execution':
+          writeToStdout(`Execution error`)
+          break
+        case 'error_max_turns':
+          writeToStdout(`Error: Reached max turns (${options.maxTurns})`)
+          break
+        case 'error_max_budget_usd':
+          writeToStdout(`Error: Exceeded USD budget (${options.maxBudgetUsd})`)
+          break
+        case 'error_max_structured_output_retries':
+          writeToStdout(
+            `Error: Failed to provide valid structured output after maximum retries`,
+          )
+      }
+  }
+
+  // Log headless latency metrics for the final turn
+  logHeadlessProfilerTurn()
+
+  // Drain any in-flight memory extraction before shutdown. The response is
+  // already flushed above, so this adds no user-visible latency — it just
+  // delays process exit so gracefulShutdownSync's 5s failsafe doesn't kill
+  // the forked agent mid-flight. Gated by isExtractModeActive so the
+  // tengu_slate_thimble flag controls non-interactive extraction end-to-end.
+  if (feature('EXTRACT_MEMORIES') && isExtractModeActive()) {
+    await extractMemoriesModule!.drainPendingExtraction()
+  }
+
+  gracefulShutdownSync(
+    lastMessage?.type === 'result' && lastMessage?.is_error ? 1 : 0,
+  )
+}
+
+function runHeadlessStreaming(
+  structuredIO: StructuredIO,
+  mcpClients: MCPServerConnection[],
+  commands: Command[],
+  tools: Tools,
+  initialMessages: Message[],
+  canUseTool: CanUseToolFn,
+  sdkMcpConfigs: Record<string, McpSdkServerConfig>,
+  getAppState: () => AppState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+  agents: AgentDefinition[],
+  options: {
+    verbose: boolean | undefined
+    jsonSchema: Record<string, unknown> | undefined
+    permissionPromptToolName: string | undefined
+    allowedTools: string[] | undefined
+    thinkingConfig: ThinkingConfig | undefined
+    maxTurns: number | undefined
+    maxBudgetUsd: number | undefined
+    taskBudget: { total: number } | undefined
+    systemPrompt: string | undefined
+    appendSystemPrompt: string | undefined
+    userSpecifiedModel: string | undefined
+    fallbackModel: string | undefined
+    replayUserMessages?: boolean | undefined
+    includePartialMessages?: boolean | undefined
+    enableAuthStatus?: boolean | undefined
+    agent?: string | undefined
+    setSDKStatus?: (status: SDKStatus) => void
+    promptSuggestions?: boolean | undefined
+    workload?: string | undefined
+  },
+  turnInterruptionState?: TurnInterruptionState,
+): AsyncIterable<StdoutMessage> {
+  let running = false
+  let runPhase:
+    | 'draining_commands'
+    | 'waiting_for_agents'
+    | 'finally_flush'
+    | 'finally_post_flush'
+    | undefined
+  let inputClosed = false
+  let shutdownPromptInjected = false
+  let heldBackResult: StdoutMessage | null = null
+  let abortController: AbortController | undefined
+  // Same queue sendRequest() enqueues to — one FIFO for everything.
+  const output = structuredIO.outbound
+
+  // Ctrl+C in -p mode: abort the in-flight query, then shut down gracefully.
+  // gracefulShutdown persists session state and flushes analytics, with a
+  // failsafe timer that force-exits if cleanup hangs.
+  const sigintHandler = () => {
+    logForDiagnosticsNoPII('info', 'shutdown_signal', { signal: 'SIGINT' })
+    if (abortController && !abortController.signal.aborted) {
+      abortController.abort()
+    }
+    void gracefulShutdown(0)
+  }
+  process.on('SIGINT', sigintHandler)
+
+  // Dump run()'s state at SIGTERM so a stuck session's healthsweep can name
+  // the do/while(waitingForAgents) poll without reading the transcript.
+  registerCleanup(async () => {
+    const bg: Record<string, number> = {}
+    for (const t of getRunningTasks(getAppState())) {
+      if (isBackgroundTask(t)) bg[t.type] = (bg[t.type] ?? 0) + 1
+    }
+    logForDiagnosticsNoPII('info', 'run_state_at_shutdown', {
+      run_active: running,
+      run_phase: runPhase,
+      worker_status: getSessionState(),
+      internal_events_pending: structuredIO.internalEventsPending,
+      bg_tasks: bg,
+    })
+  })
+
+  // Wire the central onChangeAppState mode-diff hook to the SDK output stream.
+  // This fires whenever ANY code path mutates toolPermissionContext.mode —
+  // Shift+Tab, ExitPlanMode dialog, /plan slash command, rewind, bridge
+  // set_permission_mode, the query loop, stop_task — rather than the two
+  // paths that previously went through a bespoke wrapper.
+  // The wrapper's body was fully redundant (it enqueued here AND called
+  // notifySessionMetadataChanged, both of which onChangeAppState now covers);
+  // keeping it would double-emit status messages.
+  setPermissionModeChangedListener(newMode => {
+    // Only emit for SDK-exposed modes.
+    if (
+      newMode === 'default' ||
+      newMode === 'acceptEdits' ||
+      newMode === 'bypassPermissions' ||
+      newMode === 'plan' ||
+      newMode === (feature('TRANSCRIPT_CLASSIFIER') && 'auto') ||
+      newMode === 'dontAsk'
+    ) {
+      output.enqueue({
+        type: 'system',
+        subtype: 'status',
+        status: null,
+        permissionMode: newMode as PermissionMode,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    }
+  })
+
+  // Prompt suggestion tracking (push model)
+  const suggestionState: {
+    abortController: AbortController | null
+    inflightPromise: Promise<void> | null
+    lastEmitted: {
+      text: string
+      emittedAt: number
+      promptId: PromptVariant
+      generationRequestId: string | null
+    } | null
+    pendingSuggestion: {
+      type: 'prompt_suggestion'
+      suggestion: string
+      uuid: UUID
+      session_id: string
+    } | null
+    pendingLastEmittedEntry: {
+      text: string
+      promptId: PromptVariant
+      generationRequestId: string | null
+    } | null
+  } = {
+    abortController: null,
+    inflightPromise: null,
+    lastEmitted: null,
+    pendingSuggestion: null,
+    pendingLastEmittedEntry: null,
+  }
+
+  // Set up AWS auth status listener if enabled
+  let unsubscribeAuthStatus: (() => void) | undefined
+  if (options.enableAuthStatus) {
+    const authStatusManager = AwsAuthStatusManager.getInstance()
+    unsubscribeAuthStatus = authStatusManager.subscribe(status => {
+      output.enqueue({
+        type: 'auth_status',
+        isAuthenticating: status.isAuthenticating,
+        output: status.output,
+        error: status.error,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    })
+  }
+
+  // Set up rate limit status listener to emit SDKRateLimitEvent for all status changes.
+  // Emitting for all statuses (including 'allowed') ensures consumers can clear warnings
+  // when rate limits reset. The upstream emitStatusChange already deduplicates via isEqual.
+  const rateLimitListener = (limits: ClaudeAILimits) => {
+    const rateLimitInfo = toSDKRateLimitInfo(limits)
+    if (rateLimitInfo) {
+      output.enqueue({
+        type: 'rate_limit_event',
+        rate_limit_info: rateLimitInfo,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    }
+  }
+  statusListeners.add(rateLimitListener)
+
+  // Messages for internal tracking, directly mutated by ask(). These messages
+  // include Assistant, User, Attachment, and Progress messages.
+  // TODO: Clean up this code to avoid passing around a mutable array.
+  const mutableMessages: Message[] = initialMessages
+
+  // Seed the readFileState cache from the transcript (content the model saw,
+  // with message timestamps) so getChangedFiles can detect external edits.
+  // This cache instance must persist across ask() calls, since the edit tool
+  // relies on this as a global state.
+  let readFileState = extractReadFilesFromMessages(
+    initialMessages,
+    cwd(),
+    READ_FILE_STATE_CACHE_SIZE,
+  )
+
+  // Client-supplied readFileState seeds (via seed_read_state control request).
+  // The stdin IIFE runs concurrently with ask() — a seed arriving mid-turn
+  // would be lost to ask()'s clone-then-replace (QueryEngine.ts finally block)
+  // if written directly into readFileState. Instead, seeds land here, merge
+  // into getReadFileCache's view (readFileState-wins-ties: seeds fill gaps),
+  // and are re-applied then CLEARED in setReadFileCache. One-shot: each seed
+  // survives exactly one clone-replace cycle, then becomes a regular
+  // readFileState entry subject to compact's clear like everything else.
+  const pendingSeeds = createFileStateCacheWithSizeLimit(
+    READ_FILE_STATE_CACHE_SIZE,
+  )
+
+  // Auto-resume interrupted turns on restart so CC continues from where it
+  // left off without requiring the SDK to re-send the prompt.
+  const resumeInterruptedTurnEnv =
+    process.env.CLAUDE_CODE_RESUME_INTERRUPTED_TURN
+  if (
+    turnInterruptionState &&
+    turnInterruptionState.kind !== 'none' &&
+    resumeInterruptedTurnEnv
+  ) {
+    logForDebugging(
+      `[print.ts] Auto-resuming interrupted turn (kind: ${turnInterruptionState.kind})`,
+    )
+
+    // Remove the interrupted message and its sentinel, then re-enqueue so
+    // the model sees it exactly once. For mid-turn interruptions, the
+    // deserialization layer transforms them into interrupted_prompt by
+    // appending a synthetic "Continue from where you left off." message.
+    removeInterruptedMessage(mutableMessages, turnInterruptionState.message)
+    enqueue({
+      mode: 'prompt',
+      value: turnInterruptionState.message.message.content,
+      uuid: randomUUID(),
+    })
+  }
+
+  const modelOptions = getModelOptions()
+  const modelInfos = modelOptions.map(option => {
+    const modelId = option.value === null ? 'default' : option.value
+    const resolvedModel =
+      modelId === 'default'
+        ? getDefaultMainLoopModel()
+        : parseUserSpecifiedModel(modelId)
+    const hasEffort = modelSupportsEffort(resolvedModel)
+    const hasAdaptiveThinking = modelSupportsAdaptiveThinking(resolvedModel)
+    const hasFastMode = isFastModeSupportedByModel(option.value)
+    const hasAutoMode = modelSupportsAutoMode(resolvedModel)
+    return {
+      value: modelId,
+      displayName: option.label,
+      description: option.description,
+      ...(hasEffort && {
+        supportsEffort: true,
+        supportedEffortLevels: modelSupportsMaxEffort(resolvedModel)
+          ? [...EFFORT_LEVELS]
+          : EFFORT_LEVELS.filter(l => l !== 'max'),
+      }),
+      ...(hasAdaptiveThinking && { supportsAdaptiveThinking: true }),
+      ...(hasFastMode && { supportsFastMode: true }),
+      ...(hasAutoMode && { supportsAutoMode: true }),
+    }
+  })
+  let activeUserSpecifiedModel = options.userSpecifiedModel
+
+  function injectModelSwitchBreadcrumbs(
+    modelArg: string,
+    resolvedModel: string,
+  ): void {
+    const breadcrumbs = createModelSwitchBreadcrumbs(
+      modelArg,
+      modelDisplayString(resolvedModel),
+    )
+    mutableMessages.push(...breadcrumbs)
+    for (const crumb of breadcrumbs) {
+      if (
+        typeof crumb.message.content === 'string' &&
+        crumb.message.content.includes(`<${LOCAL_COMMAND_STDOUT_TAG}>`)
+      ) {
+        output.enqueue({
+          type: 'user',
+          message: crumb.message,
+          session_id: getSessionId(),
+          parent_tool_use_id: null,
+          uuid: crumb.uuid,
+          timestamp: crumb.timestamp,
+          isReplay: true,
+        } satisfies SDKUserMessageReplay)
+      }
+    }
+  }
+
+  // Cache SDK MCP clients to avoid reconnecting on each run
+  let sdkClients: MCPServerConnection[] = []
+  let sdkTools: Tools = []
+
+  // Track which MCP clients have had elicitation handlers registered
+  const elicitationRegistered = new Set<string>()
+
+  /**
+   * Register elicitation request/completion handlers on connected MCP clients
+   * that haven't been registered yet. SDK MCP servers are excluded because they
+   * route through SdkControlClientTransport. Hooks run first (matching REPL
+   * behavior); if no hook responds, the request is forwarded to the SDK
+   * consumer via the control protocol.
+   */
+  function registerElicitationHandlers(clients: MCPServerConnection[]): void {
+    for (const connection of clients) {
+      if (
+        connection.type !== 'connected' ||
+        elicitationRegistered.has(connection.name)
+      ) {
+        continue
+      }
+      // Skip SDK MCP servers — elicitation flows through SdkControlClientTransport
+      if (connection.config.type === 'sdk') {
+        continue
+      }
+      const serverName = connection.name
+
+      // Wrapped in try/catch because setRequestHandler throws if the client wasn't
+      // created with elicitation capability declared (e.g., SDK-created clients).
+      try {
+        connection.client.setRequestHandler(
+          ElicitRequestSchema,
+          async (request, extra) => {
+            logMCPDebug(
+              serverName,
+              `Elicitation request received in print mode: ${jsonStringify(request)}`,
+            )
+
+            const mode = request.params.mode === 'url' ? 'url' : 'form'
+
+            logEvent('tengu_mcp_elicitation_shown', {
+              mode: mode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            })
+
+            // Run elicitation hooks first — they can provide a response programmatically
+            const hookResponse = await runElicitationHooks(
+              serverName,
+              request.params,
+              extra.signal,
+            )
+            if (hookResponse) {
+              logMCPDebug(
+                serverName,
+                `Elicitation resolved by hook: ${jsonStringify(hookResponse)}`,
+              )
+              logEvent('tengu_mcp_elicitation_response', {
+                mode: mode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+                action:
+                  hookResponse.action as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              })
+              return hookResponse
+            }
+
+            // Delegate to SDK consumer via control protocol
+            const url =
+              'url' in request.params
+                ? (request.params.url as string)
+                : undefined
+            const requestedSchema =
+              'requestedSchema' in request.params
+                ? (request.params.requestedSchema as
+                    | Record<string, unknown>
+                    | undefined)
+                : undefined
+
+            const elicitationId =
+              'elicitationId' in request.params
+                ? (request.params.elicitationId as string | undefined)
+                : undefined
+
+            const rawResult = await structuredIO.handleElicitation(
+              serverName,
+              request.params.message,
+              requestedSchema,
+              extra.signal,
+              mode,
+              url,
+              elicitationId,
+            )
+
+            const result = await runElicitationResultHooks(
+              serverName,
+              rawResult,
+              extra.signal,
+              mode,
+              elicitationId,
+            )
+
+            logEvent('tengu_mcp_elicitation_response', {
+              mode: mode as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+              action:
+                result.action as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+            })
+            return result
+          },
+        )
+
+        // Surface completion notifications to SDK consumers (URL mode)
+        connection.client.setNotificationHandler(
+          ElicitationCompleteNotificationSchema,
+          notification => {
+            const { elicitationId } = notification.params
+            logMCPDebug(
+              serverName,
+              `Elicitation completion notification: ${elicitationId}`,
+            )
+            void executeNotificationHooks({
+              message: `MCP server "${serverName}" confirmed elicitation ${elicitationId} complete`,
+              notificationType: 'elicitation_complete',
+            })
+            output.enqueue({
+              type: 'system',
+              subtype: 'elicitation_complete',
+              mcp_server_name: serverName,
+              elicitation_id: elicitationId,
+              uuid: randomUUID(),
+              session_id: getSessionId(),
+            })
+          },
+        )
+
+        elicitationRegistered.add(serverName)
+      } catch {
+        // setRequestHandler throws if the client wasn't created with
+        // elicitation capability — skip silently
+      }
+    }
+  }
+
+  async function updateSdkMcp() {
+    // Check if SDK MCP servers need to be updated (new servers added or removed)
+    const currentServerNames = new Set(Object.keys(sdkMcpConfigs))
+    const connectedServerNames = new Set(sdkClients.map(c => c.name))
+
+    // Check if there are any differences (additions or removals)
+    const hasNewServers = Array.from(currentServerNames).some(
+      name => !connectedServerNames.has(name),
+    )
+    const hasRemovedServers = Array.from(connectedServerNames).some(
+      name => !currentServerNames.has(name),
+    )
+    // Check if any SDK clients are pending and need to be upgraded
+    const hasPendingSdkClients = sdkClients.some(c => c.type === 'pending')
+    // Check if any SDK clients failed their handshake and need to be retried.
+    // Without this, a client that lands in 'failed' (e.g. handshake timeout on
+    // a WS reconnect race) stays failed forever — its name satisfies the
+    // connectedServerNames diff but it contributes zero tools.
+    const hasFailedSdkClients = sdkClients.some(c => c.type === 'failed')
+
+    const haveServersChanged =
+      hasNewServers ||
+      hasRemovedServers ||
+      hasPendingSdkClients ||
+      hasFailedSdkClients
+
+    if (haveServersChanged) {
+      // Clean up removed servers
+      for (const client of sdkClients) {
+        if (!currentServerNames.has(client.name)) {
+          if (client.type === 'connected') {
+            await client.cleanup()
+          }
+        }
+      }
+
+      // Re-initialize all SDK MCP servers with current config
+      const sdkSetup = await setupSdkMcpClients(
+        sdkMcpConfigs,
+        (serverName, message) =>
+          structuredIO.sendMcpMessage(serverName, message),
+      )
+      sdkClients = sdkSetup.clients
+      sdkTools = sdkSetup.tools
+
+      // Store SDK MCP tools in appState so subagents can access them via
+      // assembleToolPool. Only tools are stored here — SDK clients are already
+      // merged separately in the query loop (allMcpClients) and mcp_status handler.
+      // Use both old (connectedServerNames) and new (currentServerNames) to remove
+      // stale SDK tools when servers are added or removed.
+      const allSdkNames = uniq([...connectedServerNames, ...currentServerNames])
+      setAppState(prev => ({
+        ...prev,
+        mcp: {
+          ...prev.mcp,
+          tools: [
+            ...prev.mcp.tools.filter(
+              t =>
+                !allSdkNames.some(name =>
+                  t.name.startsWith(getMcpPrefix(name)),
+                ),
+            ),
+            ...sdkTools,
+          ],
+        },
+      }))
+
+      // Set up the special internal VSCode MCP server if necessary.
+      setupVscodeSdkMcp(sdkClients)
+    }
+  }
+
+  void updateSdkMcp()
+
+  // State for dynamically added MCP servers (via mcp_set_servers control message)
+  // These are separate from SDK MCP servers and support all transport types
+  let dynamicMcpState: DynamicMcpState = {
+    clients: [],
+    tools: [],
+    configs: {},
+  }
+
+  // Shared tool assembly for ask() and the get_context_usage control request.
+  // Closes over the mutable sdkTools/dynamicMcpState bindings so both call
+  // sites see late-connecting servers.
+  const buildAllTools = (appState: AppState): Tools => {
+    const assembledTools = assembleToolPool(
+      appState.toolPermissionContext,
+      appState.mcp.tools,
+    )
+    let allTools = uniqBy(
+      mergeAndFilterTools(
+        [...tools, ...sdkTools, ...dynamicMcpState.tools],
+        assembledTools,
+        appState.toolPermissionContext.mode,
+      ),
+      'name',
+    )
+    if (options.permissionPromptToolName) {
+      allTools = allTools.filter(
+        tool => !toolMatchesName(tool, options.permissionPromptToolName!),
+      )
+    }
+    const initJsonSchema = getInitJsonSchema()
+    if (initJsonSchema && !options.jsonSchema) {
+      const syntheticOutputResult = createSyntheticOutputTool(initJsonSchema)
+      if ('tool' in syntheticOutputResult) {
+        allTools = [...allTools, syntheticOutputResult.tool]
+      }
+    }
+    return allTools
+  }
+
+  // Bridge handle for remote-control (SDK control message).
+  // Mirrors the REPL's useReplBridge hook: the handle is created when
+  // `remote_control` is enabled and torn down when disabled.
+  let bridgeHandle: ReplBridgeHandle | null = null
+  // Cursor into mutableMessages — tracks how far we've forwarded.
+  // Same index-based diff as useReplBridge's lastWrittenIndexRef.
+  let bridgeLastForwardedIndex = 0
+
+  // Forward new messages from mutableMessages to the bridge.
+  // Called incrementally during each turn (so claude.ai sees progress
+  // and stays alive during permission waits) and again after the turn.
+  //
+  // writeMessages has its own UUID-based dedup (initialMessageUUIDs,
+  // recentPostedUUIDs) — the index cursor here is a pre-filter to avoid
+  // O(n) re-scanning of already-sent messages on every call.
+  function forwardMessagesToBridge(): void {
+    if (!bridgeHandle) return
+    // Guard against mutableMessages shrinking (compaction truncates it).
+    const startIndex = Math.min(
+      bridgeLastForwardedIndex,
+      mutableMessages.length,
+    )
+    const newMessages = mutableMessages
+      .slice(startIndex)
+      .filter(m => m.type === 'user' || m.type === 'assistant')
+    bridgeLastForwardedIndex = mutableMessages.length
+    if (newMessages.length > 0) {
+      bridgeHandle.writeMessages(newMessages)
+    }
+  }
+
+  // Helper to apply MCP server changes - used by both mcp_set_servers control message
+  // and background plugin installation.
+  // NOTE: Nested function required - mutates closure state (sdkMcpConfigs, sdkClients, etc.)
+  let mcpChangesPromise: Promise<{
+    response: SDKControlMcpSetServersResponse
+    sdkServersChanged: boolean
+  }> = Promise.resolve({
+    response: {
+      added: [] as string[],
+      removed: [] as string[],
+      errors: {} as Record<string, string>,
+    },
+    sdkServersChanged: false,
+  })
+
+  function applyMcpServerChanges(
+    servers: Record<string, McpServerConfigForProcessTransport>,
+  ): Promise<{
+    response: SDKControlMcpSetServersResponse
+    sdkServersChanged: boolean
+  }> {
+    // Serialize calls to prevent race conditions between concurrent callers
+    // (background plugin install and mcp_set_servers control messages)
+    const doWork = async (): Promise<{
+      response: SDKControlMcpSetServersResponse
+      sdkServersChanged: boolean
+    }> => {
+      const oldSdkClientNames = new Set(sdkClients.map(c => c.name))
+
+      const result = await handleMcpSetServers(
+        servers,
+        { configs: sdkMcpConfigs, clients: sdkClients, tools: sdkTools },
+        dynamicMcpState,
+        setAppState,
+      )
+
+      // Update SDK state (need to mutate sdkMcpConfigs since it's shared)
+      for (const key of Object.keys(sdkMcpConfigs)) {
+        delete sdkMcpConfigs[key]
+      }
+      Object.assign(sdkMcpConfigs, result.newSdkState.configs)
+      sdkClients = result.newSdkState.clients
+      sdkTools = result.newSdkState.tools
+      dynamicMcpState = result.newDynamicState
+
+      // Keep appState.mcp.tools in sync so subagents can see SDK MCP tools.
+      // Use both old and new SDK client names to remove stale tools.
+      if (result.sdkServersChanged) {
+        const newSdkClientNames = new Set(sdkClients.map(c => c.name))
+        const allSdkNames = uniq([...oldSdkClientNames, ...newSdkClientNames])
+        setAppState(prev => ({
+          ...prev,
+          mcp: {
+            ...prev.mcp,
+            tools: [
+              ...prev.mcp.tools.filter(
+                t =>
+                  !allSdkNames.some(name =>
+                    t.name.startsWith(getMcpPrefix(name)),
+                  ),
+              ),
+              ...sdkTools,
+            ],
+          },
+        }))
+      }
+
+      return {
+        response: result.response,
+        sdkServersChanged: result.sdkServersChanged,
+      }
+    }
+
+    mcpChangesPromise = mcpChangesPromise.then(doWork, doWork)
+    return mcpChangesPromise
+  }
+
+  // Build McpServerStatus[] for control responses. Shared by mcp_status and
+  // reload_plugins handlers. Reads closure state: sdkClients, dynamicMcpState.
+  function buildMcpServerStatuses(): McpServerStatus[] {
+    const currentAppState = getAppState()
+    const currentMcpClients = currentAppState.mcp.clients
+    const allMcpTools = uniqBy(
+      [...currentAppState.mcp.tools, ...dynamicMcpState.tools],
+      'name',
+    )
+    const existingNames = new Set([
+      ...currentMcpClients.map(c => c.name),
+      ...sdkClients.map(c => c.name),
+    ])
+    return [
+      ...currentMcpClients,
+      ...sdkClients,
+      ...dynamicMcpState.clients.filter(c => !existingNames.has(c.name)),
+    ].map(connection => {
+      let config
+      if (
+        connection.config.type === 'sse' ||
+        connection.config.type === 'http'
+      ) {
+        config = {
+          type: connection.config.type,
+          url: connection.config.url,
+          headers: connection.config.headers,
+          oauth: connection.config.oauth,
+        }
+      } else if (connection.config.type === 'claudeai-proxy') {
+        config = {
+          type: 'claudeai-proxy' as const,
+          url: connection.config.url,
+          id: connection.config.id,
+        }
+      } else if (
+        connection.config.type === 'stdio' ||
+        connection.config.type === undefined
+      ) {
+        config = {
+          type: 'stdio' as const,
+          command: connection.config.command,
+          args: connection.config.args,
+        }
+      }
+      const serverTools =
+        connection.type === 'connected'
+          ? filterToolsByServer(allMcpTools, connection.name).map(tool => ({
+              name: tool.mcpInfo?.toolName ?? tool.name,
+              annotations: {
+                readOnly: tool.isReadOnly({}) || undefined,
+                destructive: tool.isDestructive?.({}) || undefined,
+                openWorld: tool.isOpenWorld?.({}) || undefined,
+              },
+            }))
+          : undefined
+      // Capabilities passthrough with allowlist pre-filter. The IDE reads
+      // experimental['claude/channel'] to decide whether to show the
+      // Enable-channel prompt — only echo it if channel_enable would
+      // actually pass the allowlist. Not a security boundary (the
+      // handler re-runs the full gate); just avoids dead buttons.
+      let capabilities: { experimental?: Record<string, unknown> } | undefined
+      if (
+        (feature('KAIROS') || feature('KAIROS_CHANNELS')) &&
+        connection.type === 'connected' &&
+        connection.capabilities.experimental
+      ) {
+        const exp = { ...connection.capabilities.experimental }
+        if (
+          exp['claude/channel'] &&
+          (!isChannelsEnabled() ||
+            !isChannelAllowlisted(connection.config.pluginSource))
+        ) {
+          delete exp['claude/channel']
+        }
+        if (Object.keys(exp).length > 0) {
+          capabilities = { experimental: exp }
+        }
+      }
+      return {
+        name: connection.name,
+        status: connection.type,
+        serverInfo:
+          connection.type === 'connected' ? connection.serverInfo : undefined,
+        error: connection.type === 'failed' ? connection.error : undefined,
+        config,
+        scope: connection.config.scope,
+        tools: serverTools,
+        capabilities,
+      }
+    })
+  }
+
+  // NOTE: Nested function required - needs closure access to applyMcpServerChanges and updateSdkMcp
+  async function installPluginsAndApplyMcpInBackground(): Promise<void> {
+    try {
+      // Join point for user settings (fired at runHeadless entry) and managed
+      // settings (fired in main.tsx preAction). downloadUserSettings() caches
+      // its promise so this awaits the same in-flight request.
+      await Promise.all([
+        feature('DOWNLOAD_USER_SETTINGS') &&
+        (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) || getIsRemoteMode())
+          ? withDiagnosticsTiming('headless_user_settings_download', () =>
+              downloadUserSettings(),
+            )
+          : Promise.resolve(),
+        withDiagnosticsTiming('headless_managed_settings_wait', () =>
+          waitForRemoteManagedSettingsToLoad(),
+        ),
+      ])
+
+      const pluginsInstalled = await installPluginsForHeadless()
+
+      if (pluginsInstalled) {
+        await applyPluginMcpDiff()
+      }
+    } catch (error) {
+      logError(error)
+    }
+  }
+
+  // Background plugin installation for all headless users
+  // Installs marketplaces from extraKnownMarketplaces and missing enabled plugins
+  // CLAUDE_CODE_SYNC_PLUGIN_INSTALL=true: resolved in run() before the first
+  // query so plugins are guaranteed available on the first ask().
+  let pluginInstallPromise: Promise<void> | null = null
+  // --bare / SIMPLE: skip plugin install. Scripted calls don't add plugins
+  // mid-session; the next interactive run reconciles.
+  if (!isBareMode()) {
+    if (isEnvTruthy(process.env.CLAUDE_CODE_SYNC_PLUGIN_INSTALL)) {
+      pluginInstallPromise = installPluginsAndApplyMcpInBackground()
+    } else {
+      void installPluginsAndApplyMcpInBackground()
+    }
+  }
+
+  // Idle timeout management
+  const idleTimeout = createIdleTimeoutManager(() => !running)
+
+  // Mutable commands and agents for hot reloading
+  let currentCommands = commands
+  let currentAgents = agents
+
+  // Clear all plugin-related caches, reload commands/agents/hooks.
+  // Called after CLAUDE_CODE_SYNC_PLUGIN_INSTALL completes (before first query)
+  // and after non-sync background install finishes.
+  // refreshActivePlugins calls clearAllCaches() which is required because
+  // loadAllPlugins() may have run during main.tsx startup BEFORE managed
+  // settings were fetched. Without clearing, getCommands() would rebuild
+  // from a stale plugin list.
+  async function refreshPluginState(): Promise<void> {
+    // refreshActivePlugins handles the full cache sweep (clearAllCaches),
+    // reloads all plugin component loaders, writes AppState.plugins +
+    // AppState.agentDefinitions, registers hooks, and bumps mcp.pluginReconnectKey.
+    const { agentDefinitions: freshAgentDefs } =
+      await refreshActivePlugins(setAppState)
+
+    // Headless-specific: currentCommands/currentAgents are local mutable refs
+    // captured by the query loop (REPL uses AppState instead). getCommands is
+    // fresh because refreshActivePlugins cleared its cache.
+    currentCommands = await getCommands(cwd())
+
+    // Preserve SDK-provided agents (--agents CLI flag or SDK initialize
+    // control_request) — both inject via parseAgentsFromJson with
+    // source='flagSettings'. loadMarkdownFilesForSubdir never assigns this
+    // source, so it cleanly discriminates "injected, not disk-loadable".
+    //
+    // The previous filter used a negative set-diff (!freshAgentTypes.has(a))
+    // which also matched plugin agents that were in the poisoned initial
+    // currentAgents but correctly excluded from freshAgentDefs after managed
+    // settings applied — leaking policy-blocked agents into the init message.
+    // See gh-23085: isBridgeEnabled() at Commander-definition time poisoned
+    // the settings cache before setEligibility(true) ran.
+    const sdkAgents = currentAgents.filter(a => a.source === 'flagSettings')
+    currentAgents = [...freshAgentDefs.allAgents, ...sdkAgents]
+  }
+
+  // Re-diff MCP configs after plugin state changes. Filters to
+  // process-transport-supported types and carries SDK-mode servers through
+  // so applyMcpServerChanges' diff doesn't close their transports.
+  // Nested: needs closure access to sdkMcpConfigs, applyMcpServerChanges,
+  // updateSdkMcp.
+  async function applyPluginMcpDiff(): Promise<void> {
+    const { servers: newConfigs } = await getAllMcpConfigs()
+    const supportedConfigs: Record<string, McpServerConfigForProcessTransport> =
+      {}
+    for (const [name, config] of Object.entries(newConfigs)) {
+      const type = config.type
+      if (
+        type === undefined ||
+        type === 'stdio' ||
+        type === 'sse' ||
+        type === 'http' ||
+        type === 'sdk'
+      ) {
+        supportedConfigs[name] = config
+      }
+    }
+    for (const [name, config] of Object.entries(sdkMcpConfigs)) {
+      if (config.type === 'sdk' && !(name in supportedConfigs)) {
+        supportedConfigs[name] = config
+      }
+    }
+    const { response, sdkServersChanged } =
+      await applyMcpServerChanges(supportedConfigs)
+    if (sdkServersChanged) {
+      void updateSdkMcp()
+    }
+    logForDebugging(
+      `Headless MCP refresh: added=${response.added.length}, removed=${response.removed.length}`,
+    )
+  }
+
+  // Subscribe to skill changes for hot reloading
+  const unsubscribeSkillChanges = skillChangeDetector.subscribe(() => {
+    clearCommandsCache()
+    void getCommands(cwd()).then(newCommands => {
+      currentCommands = newCommands
+    })
+  })
+
+  // Proactive mode: schedule a tick to keep the model looping autonomously.
+  // setTimeout(0) yields to the event loop so pending stdin messages
+  // (interrupts, user messages) are processed before the tick fires.
+  const scheduleProactiveTick =
+    feature('PROACTIVE') || feature('KAIROS')
+      ? () => {
+          setTimeout(() => {
+            if (
+              !proactiveModule?.isProactiveActive() ||
+              proactiveModule.isProactivePaused() ||
+              inputClosed
+            ) {
+              return
+            }
+            const tickContent = `<${TICK_TAG}>${new Date().toLocaleTimeString()}</${TICK_TAG}>`
+            enqueue({
+              mode: 'prompt' as const,
+              value: tickContent,
+              uuid: randomUUID(),
+              priority: 'later',
+              isMeta: true,
+            })
+            void run()
+          }, 0)
+        }
+      : undefined
+
+  // Abort the current operation when a 'now' priority message arrives.
+  subscribeToCommandQueue(() => {
+    if (abortController && getCommandsByMaxPriority('now').length > 0) {
+      abortController.abort('interrupt')
+    }
+  })
+
+  const run = async () => {
+    if (running) {
+      return
+    }
+
+    running = true
+    runPhase = undefined
+    notifySessionStateChanged('running')
+    idleTimeout.stop()
+
+    headlessProfilerCheckpoint('run_entry')
+    // TODO(custom-tool-refactor): Should move to the init message, like browser
+
+    await updateSdkMcp()
+    headlessProfilerCheckpoint('after_updateSdkMcp')
+
+    // Resolve deferred plugin installation (CLAUDE_CODE_SYNC_PLUGIN_INSTALL).
+    // The promise was started eagerly so installation overlaps with other init.
+    // Awaiting here guarantees plugins are available before the first ask().
+    // If CLAUDE_CODE_SYNC_PLUGIN_INSTALL_TIMEOUT_MS is set, races against that
+    // deadline and proceeds without plugins on timeout (logging an error).
+    if (pluginInstallPromise) {
+      const timeoutMs = parseInt(
+        process.env.CLAUDE_CODE_SYNC_PLUGIN_INSTALL_TIMEOUT_MS || '',
+        10,
+      )
+      if (timeoutMs > 0) {
+        const timeout = sleep(timeoutMs).then(() => 'timeout' as const)
+        const result = await Promise.race([pluginInstallPromise, timeout])
+        if (result === 'timeout') {
+          logError(
+            new Error(
+              `CLAUDE_CODE_SYNC_PLUGIN_INSTALL: plugin installation timed out after ${timeoutMs}ms`,
+            ),
+          )
+          logEvent('tengu_sync_plugin_install_timeout', {
+            timeout_ms: timeoutMs,
+          })
+        }
+      } else {
+        await pluginInstallPromise
+      }
+      pluginInstallPromise = null
+
+      // Refresh commands, agents, and hooks now that plugins are installed
+      await refreshPluginState()
+
+      // Set up hot-reload for plugin hooks now that the initial install is done.
+      // In sync-install mode, setup.ts skips this to avoid racing with the install.
+      const { setupPluginHookHotReload } = await import(
+        '../utils/plugins/loadPluginHooks.js'
+      )
+      setupPluginHookHotReload()
+    }
+
+    // Only main-thread commands (agentId===undefined) — subagent
+    // notifications are drained by the subagent's mid-turn gate in query.ts.
+    // Defined outside the try block so it's accessible in the post-finally
+    // queue re-checks at the bottom of run().
+    const isMainThread = (cmd: QueuedCommand) => cmd.agentId === undefined
+
+    try {
+      let command: QueuedCommand | undefined
+      let waitingForAgents = false
+
+      // Extract command processing into a named function for the do-while pattern.
+      // Drains the queue, batching consecutive prompt-mode commands into one
+      // ask() call so messages that queued up during a long turn coalesce
+      // into a single follow-up turn instead of N separate turns.
+      const drainCommandQueue = async () => {
+        while ((command = dequeue(isMainThread))) {
+          if (
+            command.mode !== 'prompt' &&
+            command.mode !== 'orphaned-permission' &&
+            command.mode !== 'task-notification'
+          ) {
+            throw new Error(
+              'only prompt commands are supported in streaming mode',
+            )
+          }
+
+          // Non-prompt commands (task-notification, orphaned-permission) carry
+          // side effects or orphanedPermission state, so they process singly.
+          // Prompt commands greedily collect followers with matching workload.
+          const batch: QueuedCommand[] = [command]
+          if (command.mode === 'prompt') {
+            while (canBatchWith(command, peek(isMainThread))) {
+              batch.push(dequeue(isMainThread)!)
+            }
+            if (batch.length > 1) {
+              command = {
+                ...command,
+                value: joinPromptValues(batch.map(c => c.value)),
+                uuid: batch.findLast(c => c.uuid)?.uuid ?? command.uuid,
+              }
+            }
+          }
+          const batchUuids = batch.map(c => c.uuid).filter(u => u !== undefined)
+
+          // QueryEngine will emit a replay for command.uuid (the last uuid in
+          // the batch) via its messagesToAck path. Emit replays here for the
+          // rest so consumers that track per-uuid delivery (clank's
+          // asyncMessages footer, CCR) see an ack for every message they sent,
+          // not just the one that survived the merge.
+          if (options.replayUserMessages && batch.length > 1) {
+            for (const c of batch) {
+              if (c.uuid && c.uuid !== command.uuid) {
+                output.enqueue({
+                  type: 'user',
+                  message: { role: 'user', content: c.value },
+                  session_id: getSessionId(),
+                  parent_tool_use_id: null,
+                  uuid: c.uuid,
+                  isReplay: true,
+                } satisfies SDKUserMessageReplay)
+              }
+            }
+          }
+
+          // Combine all MCP clients. appState.mcp is populated incrementally
+          // per-server by main.tsx (mirrors useManageMCPConnections). Reading
+          // fresh per-command means late-connecting servers are visible on the
+          // next turn. registerElicitationHandlers is idempotent (tracking set).
+          const appState = getAppState()
+          const allMcpClients = [
+            ...appState.mcp.clients,
+            ...sdkClients,
+            ...dynamicMcpState.clients,
+          ]
+          registerElicitationHandlers(allMcpClients)
+          // Channel handlers for servers allowlisted via --channels at
+          // construction time (or enableChannel() mid-session). Runs every
+          // turn like registerElicitationHandlers — idempotent per-client
+          // (setNotificationHandler replaces, not stacks) and no-ops for
+          // non-allowlisted servers (one feature-flag check).
+          for (const client of allMcpClients) {
+            reregisterChannelHandlerAfterReconnect(client)
+          }
+
+          const allTools = buildAllTools(appState)
+
+          for (const uuid of batchUuids) {
+            notifyCommandLifecycle(uuid, 'started')
+          }
+
+          // Task notifications arrive when background agents complete.
+          // Emit an SDK system event for SDK consumers, then fall through
+          // to ask() so the model sees the agent result and can act on it.
+          // This matches TUI behavior where useQueueProcessor always feeds
+          // notifications to the model regardless of coordinator mode.
+          if (command.mode === 'task-notification') {
+            const notificationText =
+              typeof command.value === 'string' ? command.value : ''
+            // Parse the XML-formatted notification
+            const taskIdMatch = notificationText.match(
+              /<task-id>([^<]+)<\/task-id>/,
+            )
+            const toolUseIdMatch = notificationText.match(
+              /<tool-use-id>([^<]+)<\/tool-use-id>/,
+            )
+            const outputFileMatch = notificationText.match(
+              /<output-file>([^<]+)<\/output-file>/,
+            )
+            const statusMatch = notificationText.match(
+              /<status>([^<]+)<\/status>/,
+            )
+            const summaryMatch = notificationText.match(
+              /<summary>([^<]+)<\/summary>/,
+            )
+
+            const isValidStatus = (
+              s: string | undefined,
+            ): s is 'completed' | 'failed' | 'stopped' | 'killed' =>
+              s === 'completed' ||
+              s === 'failed' ||
+              s === 'stopped' ||
+              s === 'killed'
+            const rawStatus = statusMatch?.[1]
+            const status = isValidStatus(rawStatus)
+              ? rawStatus === 'killed'
+                ? 'stopped'
+                : rawStatus
+              : 'completed'
+
+            const usageMatch = notificationText.match(
+              /<usage>([\s\S]*?)<\/usage>/,
+            )
+            const usageContent = usageMatch?.[1] ?? ''
+            const totalTokensMatch = usageContent.match(
+              /<total_tokens>(\d+)<\/total_tokens>/,
+            )
+            const toolUsesMatch = usageContent.match(
+              /<tool_uses>(\d+)<\/tool_uses>/,
+            )
+            const durationMsMatch = usageContent.match(
+              /<duration_ms>(\d+)<\/duration_ms>/,
+            )
+
+            // Only emit a task_notification SDK event when a <status> tag is
+            // present — that means this is a terminal notification (completed/
+            // failed/stopped). Stream events from enqueueStreamEvent carry no
+            // <status> (they're progress pings); emitting them here would
+            // default to 'completed' and falsely close the task for SDK
+            // consumers. Terminal bookends are now emitted directly via
+            // emitTaskTerminatedSdk, so skipping statusless events is safe.
+            if (statusMatch) {
+              output.enqueue({
+                type: 'system',
+                subtype: 'task_notification',
+                task_id: taskIdMatch?.[1] ?? '',
+                tool_use_id: toolUseIdMatch?.[1],
+                status,
+                output_file: outputFileMatch?.[1] ?? '',
+                summary: summaryMatch?.[1] ?? '',
+                usage:
+                  totalTokensMatch && toolUsesMatch
+                    ? {
+                        total_tokens: parseInt(totalTokensMatch[1]!, 10),
+                        tool_uses: parseInt(toolUsesMatch[1]!, 10),
+                        duration_ms: durationMsMatch
+                          ? parseInt(durationMsMatch[1]!, 10)
+                          : 0,
+                      }
+                    : undefined,
+                session_id: getSessionId(),
+                uuid: randomUUID(),
+              })
+            }
+            // No continue -- fall through to ask() so the model processes the result
+          }
+
+          const input = command.value
+
+          if (structuredIO instanceof RemoteIO && command.mode === 'prompt') {
+            logEvent('tengu_bridge_message_received', {
+              is_repl: false,
+            })
+          }
+
+          // Abort any in-flight suggestion generation and track acceptance
+          suggestionState.abortController?.abort()
+          suggestionState.abortController = null
+          suggestionState.pendingSuggestion = null
+          suggestionState.pendingLastEmittedEntry = null
+          if (suggestionState.lastEmitted) {
+            if (command.mode === 'prompt') {
+              // SDK user messages enqueue ContentBlockParam[], not a plain string
+              const inputText =
+                typeof input === 'string'
+                  ? input
+                  : (
+                      input.find(b => b.type === 'text') as
+                        | { type: 'text'; text: string }
+                        | undefined
+                    )?.text
+              if (typeof inputText === 'string') {
+                logSuggestionOutcome(
+                  suggestionState.lastEmitted.text,
+                  inputText,
+                  suggestionState.lastEmitted.emittedAt,
+                  suggestionState.lastEmitted.promptId,
+                  suggestionState.lastEmitted.generationRequestId,
+                )
+              }
+              suggestionState.lastEmitted = null
+            }
+          }
+
+          abortController = createAbortController()
+          const turnStartTime = feature('FILE_PERSISTENCE')
+            ? Date.now()
+            : undefined
+
+          headlessProfilerCheckpoint('before_ask')
+          startQueryProfile()
+          // Per-iteration ALS context so bg agents spawned inside ask()
+          // inherit workload across their detached awaits. In-process cron
+          // stamps cmd.workload; the SDK --workload flag is options.workload.
+          // const-capture: TS loses `while ((command = dequeue()))` narrowing
+          // inside the closure.
+          const cmd = command
+          await runWithWorkload(cmd.workload ?? options.workload, async () => {
+            for await (const message of ask({
+              commands: uniqBy(
+                [...currentCommands, ...appState.mcp.commands],
+                'name',
+              ),
+              prompt: input,
+              promptUuid: cmd.uuid,
+              isMeta: cmd.isMeta,
+              cwd: cwd(),
+              tools: allTools,
+              verbose: options.verbose,
+              mcpClients: allMcpClients,
+              thinkingConfig: options.thinkingConfig,
+              maxTurns: options.maxTurns,
+              maxBudgetUsd: options.maxBudgetUsd,
+              taskBudget: options.taskBudget,
+              canUseTool,
+              userSpecifiedModel: activeUserSpecifiedModel,
+              fallbackModel: options.fallbackModel,
+              jsonSchema: getInitJsonSchema() ?? options.jsonSchema,
+              mutableMessages,
+              getReadFileCache: () =>
+                pendingSeeds.size === 0
+                  ? readFileState
+                  : mergeFileStateCaches(readFileState, pendingSeeds),
+              setReadFileCache: cache => {
+                readFileState = cache
+                for (const [path, seed] of pendingSeeds.entries()) {
+                  const existing = readFileState.get(path)
+                  if (!existing || seed.timestamp > existing.timestamp) {
+                    readFileState.set(path, seed)
+                  }
+                }
+                pendingSeeds.clear()
+              },
+              customSystemPrompt: options.systemPrompt,
+              appendSystemPrompt: options.appendSystemPrompt,
+              getAppState,
+              setAppState,
+              abortController,
+              replayUserMessages: options.replayUserMessages,
+              includePartialMessages: options.includePartialMessages,
+              handleElicitation: (serverName, params, elicitSignal) =>
+                structuredIO.handleElicitation(
+                  serverName,
+                  params.message,
+                  undefined,
+                  elicitSignal,
+                  params.mode,
+                  params.url,
+                  'elicitationId' in params ? params.elicitationId : undefined,
+                ),
+              agents: currentAgents,
+              orphanedPermission: cmd.orphanedPermission,
+              setSDKStatus: status => {
+                output.enqueue({
+                  type: 'system',
+                  subtype: 'status',
+                  status,
+                  session_id: getSessionId(),
+                  uuid: randomUUID(),
+                })
+              },
+            })) {
+              // Forward messages to bridge incrementally (mid-turn) so
+              // claude.ai sees progress and the connection stays alive
+              // while blocked on permission requests.
+              forwardMessagesToBridge()
+
+              if (message.type === 'result') {
+                // Flush pending SDK events so they appear before result on the stream.
+                for (const event of drainSdkEvents()) {
+                  output.enqueue(event)
+                }
+
+                // Hold-back: don't emit result while background agents are running
+                const currentState = getAppState()
+                if (
+                  getRunningTasks(currentState).some(
+                    t =>
+                      (t.type === 'local_agent' ||
+                        t.type === 'local_workflow') &&
+                      isBackgroundTask(t),
+                  )
+                ) {
+                  heldBackResult = message
+                } else {
+                  heldBackResult = null
+                  output.enqueue(message)
+                }
+              } else {
+                // Flush SDK events (task_started, task_progress) so background
+                // agent progress is streamed in real-time, not batched until result.
+                for (const event of drainSdkEvents()) {
+                  output.enqueue(event)
+                }
+                output.enqueue(message)
+              }
+            }
+          }) // end runWithWorkload
+
+          for (const uuid of batchUuids) {
+            notifyCommandLifecycle(uuid, 'completed')
+          }
+
+          // Forward messages to bridge after each turn
+          forwardMessagesToBridge()
+          bridgeHandle?.sendResult()
+
+          if (feature('FILE_PERSISTENCE') && turnStartTime !== undefined) {
+            void executeFilePersistence(
+              turnStartTime,
+              abortController.signal,
+              result => {
+                output.enqueue({
+                  type: 'system' as const,
+                  subtype: 'files_persisted' as const,
+                  files: result.files,
+                  failed: result.failed,
+                  processed_at: new Date().toISOString(),
+                  uuid: randomUUID(),
+                  session_id: getSessionId(),
+                })
+              },
+            )
+          }
+
+          // Generate and emit prompt suggestion for SDK consumers
+          if (
+            options.promptSuggestions &&
+            !isEnvDefinedFalsy(process.env.CLAUDE_CODE_ENABLE_PROMPT_SUGGESTION)
+          ) {
+            // TS narrows suggestionState to never in the while loop body;
+            // cast via unknown to reset narrowing.
+            const state = suggestionState as unknown as typeof suggestionState
+            state.abortController?.abort()
+            const localAbort = new AbortController()
+            suggestionState.abortController = localAbort
+
+            const cacheSafeParams = getLastCacheSafeParams()
+            if (!cacheSafeParams) {
+              logSuggestionSuppressed(
+                'sdk_no_params',
+                undefined,
+                undefined,
+                'sdk',
+              )
+            } else {
+              // Use a ref object so the IIFE's finally can compare against its own
+              // promise without a self-reference (which upsets TypeScript's flow analysis).
+              const ref: { promise: Promise<void> | null } = { promise: null }
+              ref.promise = (async () => {
+                try {
+                  const result = await tryGenerateSuggestion(
+                    localAbort,
+                    mutableMessages,
+                    getAppState,
+                    cacheSafeParams,
+                    'sdk',
+                  )
+                  if (!result || localAbort.signal.aborted) return
+                  const suggestionMsg = {
+                    type: 'prompt_suggestion' as const,
+                    suggestion: result.suggestion,
+                    uuid: randomUUID(),
+                    session_id: getSessionId(),
+                  }
+                  const lastEmittedEntry = {
+                    text: result.suggestion,
+                    emittedAt: Date.now(),
+                    promptId: result.promptId,
+                    generationRequestId: result.generationRequestId,
+                  }
+                  // Defer emission if the result is being held for background agents,
+                  // so that prompt_suggestion always arrives after result.
+                  // Only set lastEmitted when the suggestion is actually delivered
+                  // to the consumer; deferred suggestions may be discarded before
+                  // delivery if a new command arrives first.
+                  if (heldBackResult) {
+                    suggestionState.pendingSuggestion = suggestionMsg
+                    suggestionState.pendingLastEmittedEntry = {
+                      text: lastEmittedEntry.text,
+                      promptId: lastEmittedEntry.promptId,
+                      generationRequestId: lastEmittedEntry.generationRequestId,
+                    }
+                  } else {
+                    suggestionState.lastEmitted = lastEmittedEntry
+                    output.enqueue(suggestionMsg)
+                  }
+                } catch (error) {
+                  if (
+                    error instanceof Error &&
+                    (error.name === 'AbortError' ||
+                      error.name === 'APIUserAbortError')
+                  ) {
+                    logSuggestionSuppressed(
+                      'aborted',
+                      undefined,
+                      undefined,
+                      'sdk',
+                    )
+                    return
+                  }
+                  logError(toError(error))
+                } finally {
+                  if (suggestionState.inflightPromise === ref.promise) {
+                    suggestionState.inflightPromise = null
+                  }
+                }
+              })()
+              suggestionState.inflightPromise = ref.promise
+            }
+          }
+
+          // Log headless profiler metrics for this turn and start next turn
+          logHeadlessProfilerTurn()
+          logQueryProfileReport()
+          headlessProfilerStartTurn()
+        }
+      }
+
+      // Use a do-while loop to drain commands and then wait for any
+      // background agents that are still running. When agents complete,
+      // their notifications are enqueued and the loop re-drains.
+      do {
+        // Drain SDK events (task_started, task_progress) before command queue
+        // so progress events precede task_notification on the stream.
+        for (const event of drainSdkEvents()) {
+          output.enqueue(event)
+        }
+
+        runPhase = 'draining_commands'
+        await drainCommandQueue()
+
+        // Check for running background tasks before exiting.
+        // Exclude in_process_teammate — teammates are long-lived by design
+        // (status: 'running' for their whole lifetime, cleaned up by the
+        // shutdown protocol, not by transitioning to 'completed'). Waiting
+        // on them here loops forever (gh-30008). Same exclusion already
+        // exists at useBackgroundTaskNavigation.ts:55 for the same reason;
+        // L1839 above is already narrower (type === 'local_agent') so it
+        // doesn't hit this.
+        waitingForAgents = false
+        {
+          const state = getAppState()
+          const hasRunningBg = getRunningTasks(state).some(
+            t => isBackgroundTask(t) && t.type !== 'in_process_teammate',
+          )
+          const hasMainThreadQueued = peek(isMainThread) !== undefined
+          if (hasRunningBg || hasMainThreadQueued) {
+            waitingForAgents = true
+            if (!hasMainThreadQueued) {
+              runPhase = 'waiting_for_agents'
+              // No commands ready yet, wait for tasks to complete
+              await sleep(100)
+            }
+            // Loop back to drain any newly queued commands
+          }
+        }
+      } while (waitingForAgents)
+
+      if (heldBackResult) {
+        output.enqueue(heldBackResult)
+        heldBackResult = null
+        if (suggestionState.pendingSuggestion) {
+          output.enqueue(suggestionState.pendingSuggestion)
+          // Now that the suggestion is actually delivered, record it for acceptance tracking
+          if (suggestionState.pendingLastEmittedEntry) {
+            suggestionState.lastEmitted = {
+              ...suggestionState.pendingLastEmittedEntry,
+              emittedAt: Date.now(),
+            }
+            suggestionState.pendingLastEmittedEntry = null
+          }
+          suggestionState.pendingSuggestion = null
+        }
+      }
+    } catch (error) {
+      // Emit error result message before shutting down
+      // Write directly to structuredIO to ensure immediate delivery
+      try {
+        await structuredIO.write({
+          type: 'result',
+          subtype: 'error_during_execution',
+          duration_ms: 0,
+          duration_api_ms: 0,
+          is_error: true,
+          num_turns: 0,
+          stop_reason: null,
+          session_id: getSessionId(),
+          total_cost_usd: 0,
+          usage: EMPTY_USAGE,
+          modelUsage: {},
+          permission_denials: [],
+          uuid: randomUUID(),
+          errors: [
+            errorMessage(error),
+            ...getInMemoryErrors().map(_ => _.error),
+          ],
+        })
+      } catch {
+        // If we can't emit the error result, continue with shutdown anyway
+      }
+      suggestionState.abortController?.abort()
+      gracefulShutdownSync(1)
+      return
+    } finally {
+      runPhase = 'finally_flush'
+      // Flush pending internal events before going idle
+      await structuredIO.flushInternalEvents()
+      runPhase = 'finally_post_flush'
+      if (!isShuttingDown()) {
+        notifySessionStateChanged('idle')
+        // Drain so the idle session_state_changed SDK event (plus any
+        // terminal task_notification bookends emitted during bg-agent
+        // teardown) reach the output stream before we block on the next
+        // command. The do-while drain above only runs while
+        // waitingForAgents; once we're here the next drain would be the
+        // top of the next run(), which won't come if input is idle.
+        for (const event of drainSdkEvents()) {
+          output.enqueue(event)
+        }
+      }
+      running = false
+      // Start idle timer when we finish processing and are waiting for input
+      idleTimeout.start()
+    }
+
+    // Proactive tick: if proactive is active and queue is empty, inject a tick
+    if (
+      (feature('PROACTIVE') || feature('KAIROS')) &&
+      proactiveModule?.isProactiveActive() &&
+      !proactiveModule.isProactivePaused()
+    ) {
+      if (peek(isMainThread) === undefined && !inputClosed) {
+        scheduleProactiveTick!()
+        return
+      }
+    }
+
+    // Re-check the queue after releasing the mutex. A message may have
+    // arrived (and called run()) between the last dequeue() returning
+    // undefined and `running = false` above. In that case the caller
+    // saw `running === true` and returned immediately, leaving the
+    // message stranded in the queue with no one to process it.
+    if (peek(isMainThread) !== undefined) {
+      void run()
+      return
+    }
+
+    // Check for unread teammate messages and process them
+    // This mirrors what useInboxPoller does in interactive REPL mode
+    // Poll until no more messages (teammates may still be working)
+    {
+      const currentAppState = getAppState()
+      const teamContext = currentAppState.teamContext
+
+      if (teamContext && isTeamLead(teamContext)) {
+        const agentName = 'team-lead'
+
+        // Poll for messages while teammates are active
+        // This is needed because teammates may send messages while we're waiting
+        // Keep polling until the team is shut down
+        const POLL_INTERVAL_MS = 500
+
+        while (true) {
+          // Check if teammates are still active
+          const refreshedState = getAppState()
+          const hasActiveTeammates =
+            hasActiveInProcessTeammates(refreshedState) ||
+            (refreshedState.teamContext &&
+              Object.keys(refreshedState.teamContext.teammates).length > 0)
+
+          if (!hasActiveTeammates) {
+            logForDebugging(
+              '[print.ts] No more active teammates, stopping poll',
+            )
+            break
+          }
+
+          const unread = await readUnreadMessages(
+            agentName,
+            refreshedState.teamContext?.teamName,
+          )
+
+          if (unread.length > 0) {
+            logForDebugging(
+              `[print.ts] Team-lead found ${unread.length} unread messages`,
+            )
+
+            // Mark as read immediately to avoid duplicate processing
+            await markMessagesAsRead(
+              agentName,
+              refreshedState.teamContext?.teamName,
+            )
+
+            // Process shutdown_approved messages - remove teammates from team file
+            // This mirrors what useInboxPoller does in interactive mode (lines 546-606)
+            const teamName = refreshedState.teamContext?.teamName
+            for (const m of unread) {
+              const shutdownApproval = isShutdownApproved(m.text)
+              if (shutdownApproval && teamName) {
+                const teammateToRemove = shutdownApproval.from
+                logForDebugging(
+                  `[print.ts] Processing shutdown_approved from ${teammateToRemove}`,
+                )
+
+                // Find the teammate ID by name
+                const teammateId = refreshedState.teamContext?.teammates
+                  ? Object.entries(refreshedState.teamContext.teammates).find(
+                      ([, t]) => t.name === teammateToRemove,
+                    )?.[0]
+                  : undefined
+
+                if (teammateId) {
+                  // Remove from team file
+                  removeTeammateFromTeamFile(teamName, {
+                    agentId: teammateId,
+                    name: teammateToRemove,
+                  })
+                  logForDebugging(
+                    `[print.ts] Removed ${teammateToRemove} from team file`,
+                  )
+
+                  // Unassign tasks owned by this teammate
+                  await unassignTeammateTasks(
+                    teamName,
+                    teammateId,
+                    teammateToRemove,
+                    'shutdown',
+                  )
+
+                  // Remove from teamContext in AppState
+                  setAppState(prev => {
+                    if (!prev.teamContext?.teammates) return prev
+                    if (!(teammateId in prev.teamContext.teammates)) return prev
+                    const { [teammateId]: _, ...remainingTeammates } =
+                      prev.teamContext.teammates
+                    return {
+                      ...prev,
+                      teamContext: {
+                        ...prev.teamContext,
+                        teammates: remainingTeammates,
+                      },
+                    }
+                  })
+                }
+              }
+            }
+
+            // Format messages same as useInboxPoller
+            const formatted = unread
+              .map(
+                (m: { from: string; text: string; color?: string }) =>
+                  `<${TEAMMATE_MESSAGE_TAG} teammate_id="${m.from}"${m.color ? ` color="${m.color}"` : ''}>\n${m.text}\n</${TEAMMATE_MESSAGE_TAG}>`,
+              )
+              .join('\n\n')
+
+            // Enqueue and process
+            enqueue({
+              mode: 'prompt',
+              value: formatted,
+              uuid: randomUUID(),
+            })
+            void run()
+            return // run() will come back here after processing
+          }
+
+          // No messages - check if we need to prompt for shutdown
+          // If input is closed and teammates are active, inject shutdown prompt once
+          if (inputClosed && !shutdownPromptInjected) {
+            shutdownPromptInjected = true
+            logForDebugging(
+              '[print.ts] Input closed with active teammates, injecting shutdown prompt',
+            )
+            enqueue({
+              mode: 'prompt',
+              value: SHUTDOWN_TEAM_PROMPT,
+              uuid: randomUUID(),
+            })
+            void run()
+            return // run() will come back here after processing
+          }
+
+          // Wait and check again
+          await sleep(POLL_INTERVAL_MS)
+        }
+      }
+    }
+
+    if (inputClosed) {
+      // Check for active swarm that needs shutdown
+      const hasActiveSwarm = await (async () => {
+        // Wait for any working in-process team members to finish
+        const currentAppState = getAppState()
+        if (hasWorkingInProcessTeammates(currentAppState)) {
+          await waitForTeammatesToBecomeIdle(setAppState, currentAppState)
+        }
+
+        // Re-fetch state after potential wait
+        const refreshedAppState = getAppState()
+        const refreshedTeamContext = refreshedAppState.teamContext
+        const hasTeamMembersNotCleanedUp =
+          refreshedTeamContext &&
+          Object.keys(refreshedTeamContext.teammates).length > 0
+
+        return (
+          hasTeamMembersNotCleanedUp ||
+          hasActiveInProcessTeammates(refreshedAppState)
+        )
+      })()
+
+      if (hasActiveSwarm) {
+        // Team members are idle or pane-based - inject prompt to shut down team
+        enqueue({
+          mode: 'prompt',
+          value: SHUTDOWN_TEAM_PROMPT,
+          uuid: randomUUID(),
+        })
+        void run()
+      } else {
+        // Wait for any in-flight push suggestion before closing the output stream.
+        if (suggestionState.inflightPromise) {
+          await Promise.race([suggestionState.inflightPromise, sleep(5000)])
+        }
+        suggestionState.abortController?.abort()
+        suggestionState.abortController = null
+        await finalizePendingAsyncHooks()
+        unsubscribeSkillChanges()
+        unsubscribeAuthStatus?.()
+        statusListeners.delete(rateLimitListener)
+        output.done()
+      }
+    }
+  }
+
+  // Set up UDS inbox callback so the query loop is kicked off
+  // when a message arrives via the UDS socket in headless mode.
+  if (feature('UDS_INBOX')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { setOnEnqueue } = require('../utils/udsMessaging.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    setOnEnqueue(() => {
+      if (!inputClosed) {
+        void run()
+      }
+    })
+  }
+
+  // Cron scheduler: runs scheduled_tasks.json tasks in SDK/-p mode.
+  // Mirrors REPL's useScheduledTasks hook. Fired prompts enqueue + kick
+  // off run() directly — unlike REPL, there's no queue subscriber here
+  // that drains on enqueue while idle. The run() mutex makes this safe
+  // during an active turn: the call no-ops and the post-run recheck at
+  // the end of run() picks up the queued command.
+  let cronScheduler: import('../utils/cronScheduler.js').CronScheduler | null =
+    null
+  if (
+    feature('AGENT_TRIGGERS') &&
+    cronSchedulerModule &&
+    cronGate?.isKairosCronEnabled()
+  ) {
+    cronScheduler = cronSchedulerModule.createCronScheduler({
+      onFire: prompt => {
+        if (inputClosed) return
+        enqueue({
+          mode: 'prompt',
+          value: prompt,
+          uuid: randomUUID(),
+          priority: 'later',
+          // System-generated — matches useScheduledTasks.ts REPL equivalent.
+          // Without this, messages.ts metaProp eval is {} → prompt leaks
+          // into visible transcript when cron fires mid-turn in -p mode.
+          isMeta: true,
+          // Threaded to cc_workload= in the billing-header attribution block
+          // so the API can serve cron requests at lower QoS. drainCommandQueue
+          // reads this per-iteration and hoists it into bootstrap state for
+          // the ask() call.
+          workload: WORKLOAD_CRON,
+        })
+        void run()
+      },
+      isLoading: () => running || inputClosed,
+      getJitterConfig: cronJitterConfigModule?.getCronJitterConfig,
+      isKilled: () => !cronGate?.isKairosCronEnabled(),
+    })
+    cronScheduler.start()
+  }
+
+  const sendControlResponseSuccess = function (
+    message: SDKControlRequest,
+    response?: Record<string, unknown>,
+  ) {
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'success',
+        request_id: message.request_id,
+        response: response,
+      },
+    })
+  }
+
+  const sendControlResponseError = function (
+    message: SDKControlRequest,
+    errorMessage: string,
+  ) {
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        request_id: message.request_id,
+        error: errorMessage,
+      },
+    })
+  }
+
+  // Handle unexpected permission responses by looking up the unresolved tool
+  // call in the transcript and executing it
+  const handledOrphanedToolUseIds = new Set<string>()
+  structuredIO.setUnexpectedResponseCallback(async message => {
+    await handleOrphanedPermissionResponse({
+      message,
+      setAppState,
+      handledToolUseIds: handledOrphanedToolUseIds,
+      onEnqueued: () => {
+        // The first message of a session might be the orphaned permission
+        // check rather than a user prompt, so kick off the loop.
+        void run()
+      },
+    })
+  })
+
+  // Track active OAuth flows per server so we can abort a previous flow
+  // when a new mcp_authenticate request arrives for the same server.
+  const activeOAuthFlows = new Map<string, AbortController>()
+  // Track manual callback URL submit functions for active OAuth flows.
+  // Used when localhost is not reachable (e.g., browser-based IDEs).
+  const oauthCallbackSubmitters = new Map<
+    string,
+    (callbackUrl: string) => void
+  >()
+  // Track servers where the manual callback was actually invoked (so the
+  // automatic reconnect path knows to skip — the extension will reconnect).
+  const oauthManualCallbackUsed = new Set<string>()
+  // Track OAuth auth-only promises so mcp_oauth_callback_url can await
+  // token exchange completion. Reconnect is handled separately by the
+  // extension via handleAuthDone → mcp_reconnect.
+  const oauthAuthPromises = new Map<string, Promise<void>>()
+
+  // In-flight Anthropic OAuth flow (claude_authenticate). Single-slot: a
+  // second authenticate request cleans up the first. The service holds the
+  // PKCE verifier + localhost listener; the promise settles after
+  // installOAuthTokens — after it resolves, the in-process memoized token
+  // cache is already cleared and the next API call picks up the new creds.
+  let claudeOAuth: {
+    service: OAuthService
+    flow: Promise<void>
+  } | null = null
+
+  // This is essentially spawning a parallel async task- we have two
+  // running in parallel- one reading from stdin and adding to the
+  // queue to be processed and another reading from the queue,
+  // processing and returning the result of the generation.
+  // The process is complete when the input stream completes and
+  // the last generation of the queue has complete.
+  void (async () => {
+    let initialized = false
+    logForDiagnosticsNoPII('info', 'cli_message_loop_started')
+    for await (const message of structuredIO.structuredInput) {
+      // Non-user events are handled inline (no queue). started→completed in
+      // the same tick carries no information, so only fire completed.
+      // control_response is reported by StructuredIO.processLine (which also
+      // sees orphans that never yield here).
+      const eventId = 'uuid' in message ? message.uuid : undefined
+      if (
+        eventId &&
+        message.type !== 'user' &&
+        message.type !== 'control_response'
+      ) {
+        notifyCommandLifecycle(eventId, 'completed')
+      }
+
+      if (message.type === 'control_request') {
+        if (message.request.subtype === 'interrupt') {
+          // Track escapes for attribution (ant-only feature)
+          if (feature('COMMIT_ATTRIBUTION')) {
+            setAppState(prev => ({
+              ...prev,
+              attribution: {
+                ...prev.attribution,
+                escapeCount: prev.attribution.escapeCount + 1,
+              },
+            }))
+          }
+          if (abortController) {
+            abortController.abort()
+          }
+          suggestionState.abortController?.abort()
+          suggestionState.abortController = null
+          suggestionState.lastEmitted = null
+          suggestionState.pendingSuggestion = null
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'end_session') {
+          logForDebugging(
+            `[print.ts] end_session received, reason=${message.request.reason ?? 'unspecified'}`,
+          )
+          if (abortController) {
+            abortController.abort()
+          }
+          suggestionState.abortController?.abort()
+          suggestionState.abortController = null
+          suggestionState.lastEmitted = null
+          suggestionState.pendingSuggestion = null
+          sendControlResponseSuccess(message)
+          break // exits for-await → falls through to inputClosed=true drain below
+        } else if (message.request.subtype === 'initialize') {
+          // SDK MCP server names from the initialize message
+          // Populated by both browser and ProcessTransport sessions
+          if (
+            message.request.sdkMcpServers &&
+            message.request.sdkMcpServers.length > 0
+          ) {
+            for (const serverName of message.request.sdkMcpServers) {
+              // Create placeholder config for SDK MCP servers
+              // The actual server connection is managed by the SDK Query class
+              sdkMcpConfigs[serverName] = {
+                type: 'sdk',
+                name: serverName,
+              }
+            }
+          }
+
+          await handleInitializeRequest(
+            message.request,
+            message.request_id,
+            initialized,
+            output,
+            commands,
+            modelInfos,
+            structuredIO,
+            !!options.enableAuthStatus,
+            options,
+            agents,
+            getAppState,
+          )
+
+          // Enable prompt suggestions in AppState when SDK consumer opts in.
+          // shouldEnablePromptSuggestion() returns false for non-interactive
+          // sessions, but the SDK consumer explicitly requested suggestions.
+          if (message.request.promptSuggestions) {
+            setAppState(prev => {
+              if (prev.promptSuggestionEnabled) return prev
+              return { ...prev, promptSuggestionEnabled: true }
+            })
+          }
+
+          if (
+            message.request.agentProgressSummaries &&
+            getFeatureValue_CACHED_MAY_BE_STALE('tengu_slate_prism', true)
+          ) {
+            setSdkAgentProgressSummariesEnabled(true)
+          }
+
+          initialized = true
+
+          // If the auto-resume logic pre-enqueued a command, drain it now
+          // that initialize has set up systemPrompt, agents, hooks, etc.
+          if (hasCommandsInQueue()) {
+            void run()
+          }
+        } else if (message.request.subtype === 'set_permission_mode') {
+          const m = message.request // for typescript (TODO: use readonly types to avoid this)
+          setAppState(prev => ({
+            ...prev,
+            toolPermissionContext: handleSetPermissionMode(
+              m,
+              message.request_id,
+              prev.toolPermissionContext,
+              output,
+            ),
+            isUltraplanMode: m.ultraplan ?? prev.isUltraplanMode,
+          }))
+          // handleSetPermissionMode sends the control_response; the
+          // notifySessionMetadataChanged that used to follow here is
+          // now fired by onChangeAppState (with externalized mode name).
+        } else if (message.request.subtype === 'set_model') {
+          const requestedModel = message.request.model ?? 'default'
+          const model =
+            requestedModel === 'default'
+              ? getDefaultMainLoopModel()
+              : requestedModel
+          activeUserSpecifiedModel = model
+          setMainLoopModelOverride(model)
+          notifySessionMetadataChanged({ model })
+          injectModelSwitchBreadcrumbs(requestedModel, model)
+
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'set_max_thinking_tokens') {
+          if (message.request.max_thinking_tokens === null) {
+            options.thinkingConfig = undefined
+          } else if (message.request.max_thinking_tokens === 0) {
+            options.thinkingConfig = { type: 'disabled' }
+          } else {
+            options.thinkingConfig = {
+              type: 'enabled',
+              budgetTokens: message.request.max_thinking_tokens,
+            }
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'mcp_status') {
+          sendControlResponseSuccess(message, {
+            mcpServers: buildMcpServerStatuses(),
+          })
+        } else if (message.request.subtype === 'get_context_usage') {
+          try {
+            const appState = getAppState()
+            const data = await collectContextData({
+              messages: mutableMessages,
+              getAppState,
+              options: {
+                mainLoopModel: getMainLoopModel(),
+                tools: buildAllTools(appState),
+                agentDefinitions: appState.agentDefinitions,
+                customSystemPrompt: options.systemPrompt,
+                appendSystemPrompt: options.appendSystemPrompt,
+              },
+            })
+            sendControlResponseSuccess(message, { ...data })
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (message.request.subtype === 'mcp_message') {
+          // Handle MCP notifications from SDK servers
+          const mcpRequest = message.request
+          const sdkClient = sdkClients.find(
+            client => client.name === mcpRequest.server_name,
+          )
+          // Check client exists - dynamically added SDK servers may have
+          // placeholder clients with null client until updateSdkMcp() runs
+          if (
+            sdkClient &&
+            sdkClient.type === 'connected' &&
+            sdkClient.client?.transport?.onmessage
+          ) {
+            sdkClient.client.transport.onmessage(mcpRequest.message)
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'rewind_files') {
+          const appState = getAppState()
+          const result = await handleRewindFiles(
+            message.request.user_message_id as UUID,
+            appState,
+            setAppState,
+            message.request.dry_run ?? false,
+          )
+          if (result.canRewind || message.request.dry_run) {
+            sendControlResponseSuccess(message, result)
+          } else {
+            sendControlResponseError(
+              message,
+              result.error ?? 'Unexpected error',
+            )
+          }
+        } else if (message.request.subtype === 'cancel_async_message') {
+          const targetUuid = message.request.message_uuid
+          const removed = dequeueAllMatching(cmd => cmd.uuid === targetUuid)
+          sendControlResponseSuccess(message, {
+            cancelled: removed.length > 0,
+          })
+        } else if (message.request.subtype === 'seed_read_state') {
+          // Client observed a Read that was later removed from context (e.g.
+          // by snip), so transcript-based seeding missed it. Queued into
+          // pendingSeeds; applied at the next clone-replace boundary.
+          try {
+            // expandPath: all other readFileState writers normalize (~, relative,
+            // session cwd vs process cwd). FileEditTool looks up by expandPath'd
+            // key — a verbatim client path would miss.
+            const normalizedPath = expandPath(message.request.path)
+            // Check disk mtime before reading content. If the file changed
+            // since the client's observation, readFile would return C_current
+            // but we'd store it with the client's M_observed — getChangedFiles
+            // then sees disk > cache.timestamp, re-reads, diffs C_current vs
+            // C_current = empty, emits no attachment, and the model is never
+            // told about the C_observed → C_current change. Skipping the seed
+            // makes Edit fail "file not read yet" → forces a fresh Read.
+            // Math.floor matches FileReadTool and getFileModificationTime.
+            const diskMtime = Math.floor((await stat(normalizedPath)).mtimeMs)
+            if (diskMtime <= message.request.mtime) {
+              const raw = await readFile(normalizedPath, 'utf-8')
+              // Strip BOM + normalize CRLF→LF to match readFileInRange and
+              // readFileSyncWithMetadata. FileEditTool's content-compare
+              // fallback (for Windows mtime bumps without content change)
+              // compares against LF-normalized disk reads.
+              const content = (
+                raw.charCodeAt(0) === 0xfeff ? raw.slice(1) : raw
+              ).replaceAll('\r\n', '\n')
+              pendingSeeds.set(normalizedPath, {
+                content,
+                timestamp: diskMtime,
+                offset: undefined,
+                limit: undefined,
+              })
+            }
+          } catch {
+            // ENOENT etc — skip seeding but still succeed
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'mcp_set_servers') {
+          const { response, sdkServersChanged } = await applyMcpServerChanges(
+            message.request.servers,
+          )
+          sendControlResponseSuccess(message, response)
+
+          // Connect SDK servers AFTER response to avoid deadlock
+          if (sdkServersChanged) {
+            void updateSdkMcp()
+          }
+        } else if (message.request.subtype === 'reload_plugins') {
+          try {
+            if (
+              feature('DOWNLOAD_USER_SETTINGS') &&
+              (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE) || getIsRemoteMode())
+            ) {
+              // Re-pull user settings so enabledPlugins pushed from the
+              // user's local CLI take effect before the cache sweep.
+              const applied = await redownloadUserSettings()
+              if (applied) {
+                settingsChangeDetector.notifyChange('userSettings')
+              }
+            }
+
+            const r = await refreshActivePlugins(setAppState)
+
+            const sdkAgents = currentAgents.filter(
+              a => a.source === 'flagSettings',
+            )
+            currentAgents = [...r.agentDefinitions.allAgents, ...sdkAgents]
+
+            // Reload succeeded — gather response data best-effort so a
+            // read failure doesn't mask the successful state change.
+            // allSettled so one failure doesn't discard the others.
+            let plugins: SDKControlReloadPluginsResponse['plugins'] = []
+            const [cmdsR, mcpR, pluginsR] = await Promise.allSettled([
+              getCommands(cwd()),
+              applyPluginMcpDiff(),
+              loadAllPluginsCacheOnly(),
+            ])
+            if (cmdsR.status === 'fulfilled') {
+              currentCommands = cmdsR.value
+            } else {
+              logError(cmdsR.reason)
+            }
+            if (mcpR.status === 'rejected') {
+              logError(mcpR.reason)
+            }
+            if (pluginsR.status === 'fulfilled') {
+              plugins = pluginsR.value.enabled.map(p => ({
+                name: p.name,
+                path: p.path,
+                source: p.source,
+              }))
+            } else {
+              logError(pluginsR.reason)
+            }
+
+            sendControlResponseSuccess(message, {
+              commands: currentCommands
+                .filter(cmd => cmd.userInvocable !== false)
+                .map(cmd => ({
+                  name: getCommandName(cmd),
+                  description: formatDescriptionWithSource(cmd),
+                  argumentHint: cmd.argumentHint || '',
+                })),
+              agents: currentAgents.map(a => ({
+                name: a.agentType,
+                description: a.whenToUse,
+                model: a.model === 'inherit' ? undefined : a.model,
+              })),
+              plugins,
+              mcpServers: buildMcpServerStatuses(),
+              error_count: r.error_count,
+            } satisfies SDKControlReloadPluginsResponse)
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (message.request.subtype === 'mcp_reconnect') {
+          const currentAppState = getAppState()
+          const { serverName } = message.request
+          elicitationRegistered.delete(serverName)
+          // Config-existence gate must cover the SAME sources as the
+          // operations below. SDK-injected servers (query({mcpServers:{...}}))
+          // and dynamically-added servers were missing here, so
+          // toggleMcpServer/reconnect returned "Server not found" even though
+          // the disconnect/reconnect would have worked (gh-31339 / CC-314).
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            sdkClients.find(c => c.name === serverName)?.config ??
+            dynamicMcpState.clients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else {
+            const result = await reconnectMcpServerImpl(serverName, config)
+            // Update appState.mcp with the new client, tools, commands, and resources
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName ? result.client : c,
+                ),
+                tools: [
+                  ...reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                  ...result.tools,
+                ],
+                commands: [
+                  ...reject(prev.mcp.commands, c =>
+                    commandBelongsToServer(c, serverName),
+                  ),
+                  ...result.commands,
+                ],
+                resources:
+                  result.resources && result.resources.length > 0
+                    ? { ...prev.mcp.resources, [serverName]: result.resources }
+                    : omit(prev.mcp.resources, serverName),
+              },
+            }))
+            // Also update dynamicMcpState so run() picks up the new tools
+            // on the next turn (run() reads dynamicMcpState, not appState)
+            dynamicMcpState = {
+              ...dynamicMcpState,
+              clients: [
+                ...dynamicMcpState.clients.filter(c => c.name !== serverName),
+                result.client,
+              ],
+              tools: [
+                ...dynamicMcpState.tools.filter(
+                  t => !t.name?.startsWith(prefix),
+                ),
+                ...result.tools,
+              ],
+            }
+            if (result.client.type === 'connected') {
+              registerElicitationHandlers([result.client])
+              reregisterChannelHandlerAfterReconnect(result.client)
+              sendControlResponseSuccess(message)
+            } else {
+              const errorMessage =
+                result.client.type === 'failed'
+                  ? (result.client.error ?? 'Connection failed')
+                  : `Server status: ${result.client.type}`
+              sendControlResponseError(message, errorMessage)
+            }
+          }
+        } else if (message.request.subtype === 'mcp_toggle') {
+          const currentAppState = getAppState()
+          const { serverName, enabled } = message.request
+          elicitationRegistered.delete(serverName)
+          // Gate must match the client-lookup spread below (which
+          // includes sdkClients and dynamicMcpState.clients). Same fix as
+          // mcp_reconnect above (gh-31339 / CC-314).
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            sdkClients.find(c => c.name === serverName)?.config ??
+            dynamicMcpState.clients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else if (!enabled) {
+            // Disabling: persist + disconnect (matches TUI toggleMcpServer behavior)
+            setMcpServerEnabled(serverName, false)
+            const client = [
+              ...mcpClients,
+              ...sdkClients,
+              ...dynamicMcpState.clients,
+              ...currentAppState.mcp.clients,
+            ].find(c => c.name === serverName)
+            if (client && client.type === 'connected') {
+              await clearServerCache(serverName, config)
+            }
+            // Update appState.mcp to reflect disabled status and remove tools/commands/resources
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName
+                    ? { name: serverName, type: 'disabled' as const, config }
+                    : c,
+                ),
+                tools: reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                commands: reject(prev.mcp.commands, c =>
+                  commandBelongsToServer(c, serverName),
+                ),
+                resources: omit(prev.mcp.resources, serverName),
+              },
+            }))
+            sendControlResponseSuccess(message)
+          } else {
+            // Enabling: persist + reconnect
+            setMcpServerEnabled(serverName, true)
+            const result = await reconnectMcpServerImpl(serverName, config)
+            // Update appState.mcp with the new client, tools, commands, and resources
+            // This ensures the LLM sees updated tools after enabling the server
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName ? result.client : c,
+                ),
+                tools: [
+                  ...reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                  ...result.tools,
+                ],
+                commands: [
+                  ...reject(prev.mcp.commands, c =>
+                    commandBelongsToServer(c, serverName),
+                  ),
+                  ...result.commands,
+                ],
+                resources:
+                  result.resources && result.resources.length > 0
+                    ? { ...prev.mcp.resources, [serverName]: result.resources }
+                    : omit(prev.mcp.resources, serverName),
+              },
+            }))
+            if (result.client.type === 'connected') {
+              registerElicitationHandlers([result.client])
+              reregisterChannelHandlerAfterReconnect(result.client)
+              sendControlResponseSuccess(message)
+            } else {
+              const errorMessage =
+                result.client.type === 'failed'
+                  ? (result.client.error ?? 'Connection failed')
+                  : `Server status: ${result.client.type}`
+              sendControlResponseError(message, errorMessage)
+            }
+          }
+        } else if (message.request.subtype === 'channel_enable') {
+          const currentAppState = getAppState()
+          handleChannelEnable(
+            message.request_id,
+            message.request.serverName,
+            // Pool spread matches mcp_status — all three client sources.
+            [
+              ...currentAppState.mcp.clients,
+              ...sdkClients,
+              ...dynamicMcpState.clients,
+            ],
+            output,
+          )
+        } else if (message.request.subtype === 'mcp_authenticate') {
+          const { serverName } = message.request
+          const currentAppState = getAppState()
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else if (config.type !== 'sse' && config.type !== 'http') {
+            sendControlResponseError(
+              message,
+              `Server type "${config.type}" does not support OAuth authentication`,
+            )
+          } else {
+            try {
+              // Abort any previous in-flight OAuth flow for this server
+              activeOAuthFlows.get(serverName)?.abort()
+              const controller = new AbortController()
+              activeOAuthFlows.set(serverName, controller)
+
+              // Capture the auth URL from the callback
+              let resolveAuthUrl: (url: string) => void
+              const authUrlPromise = new Promise<string>(resolve => {
+                resolveAuthUrl = resolve
+              })
+
+              // Start the OAuth flow in the background
+              const oauthPromise = performMCPOAuthFlow(
+                serverName,
+                config,
+                url => resolveAuthUrl!(url),
+                controller.signal,
+                {
+                  skipBrowserOpen: true,
+                  onWaitingForCallback: submit => {
+                    oauthCallbackSubmitters.set(serverName, submit)
+                  },
+                },
+              )
+
+              // Wait for the auth URL (or the flow to complete without needing redirect)
+              const authUrl = await Promise.race([
+                authUrlPromise,
+                oauthPromise.then(() => null as string | null),
+              ])
+
+              if (authUrl) {
+                sendControlResponseSuccess(message, {
+                  authUrl,
+                  requiresUserAction: true,
+                })
+              } else {
+                sendControlResponseSuccess(message, {
+                  requiresUserAction: false,
+                })
+              }
+
+              // Store auth-only promise for mcp_oauth_callback_url handler.
+              // Don't swallow errors — the callback handler needs to detect
+              // auth failures and report them to the caller.
+              oauthAuthPromises.set(serverName, oauthPromise)
+
+              // Handle background completion — reconnect after auth.
+              // When manual callback is used, skip the reconnect here;
+              // the extension's handleAuthDone → mcp_reconnect handles it
+              // (which also updates dynamicMcpState for tool registration).
+              const fullFlowPromise = oauthPromise
+                .then(async () => {
+                  // Don't reconnect if the server was disabled during the OAuth flow
+                  if (isMcpServerDisabled(serverName)) {
+                    return
+                  }
+                  // Skip reconnect if the manual callback path was used —
+                  // handleAuthDone will do it via mcp_reconnect (which
+                  // updates dynamicMcpState for tool registration).
+                  if (oauthManualCallbackUsed.has(serverName)) {
+                    return
+                  }
+                  // Reconnect the server after successful auth
+                  const result = await reconnectMcpServerImpl(
+                    serverName,
+                    config,
+                  )
+                  const prefix = getMcpPrefix(serverName)
+                  setAppState(prev => ({
+                    ...prev,
+                    mcp: {
+                      ...prev.mcp,
+                      clients: prev.mcp.clients.map(c =>
+                        c.name === serverName ? result.client : c,
+                      ),
+                      tools: [
+                        ...reject(prev.mcp.tools, t =>
+                          t.name?.startsWith(prefix),
+                        ),
+                        ...result.tools,
+                      ],
+                      commands: [
+                        ...reject(prev.mcp.commands, c =>
+                          commandBelongsToServer(c, serverName),
+                        ),
+                        ...result.commands,
+                      ],
+                      resources:
+                        result.resources && result.resources.length > 0
+                          ? {
+                              ...prev.mcp.resources,
+                              [serverName]: result.resources,
+                            }
+                          : omit(prev.mcp.resources, serverName),
+                    },
+                  }))
+                  // Also update dynamicMcpState so run() picks up the new tools
+                  // on the next turn (run() reads dynamicMcpState, not appState)
+                  dynamicMcpState = {
+                    ...dynamicMcpState,
+                    clients: [
+                      ...dynamicMcpState.clients.filter(
+                        c => c.name !== serverName,
+                      ),
+                      result.client,
+                    ],
+                    tools: [
+                      ...dynamicMcpState.tools.filter(
+                        t => !t.name?.startsWith(prefix),
+                      ),
+                      ...result.tools,
+                    ],
+                  }
+                })
+                .catch(error => {
+                  logForDebugging(
+                    `MCP OAuth failed for ${serverName}: ${error}`,
+                    { level: 'error' },
+                  )
+                })
+                .finally(() => {
+                  // Clean up only if this is still the active flow
+                  if (activeOAuthFlows.get(serverName) === controller) {
+                    activeOAuthFlows.delete(serverName)
+                    oauthCallbackSubmitters.delete(serverName)
+                    oauthManualCallbackUsed.delete(serverName)
+                    oauthAuthPromises.delete(serverName)
+                  }
+                })
+              void fullFlowPromise
+            } catch (error) {
+              sendControlResponseError(message, errorMessage(error))
+            }
+          }
+        } else if (message.request.subtype === 'mcp_oauth_callback_url') {
+          const { serverName, callbackUrl } = message.request
+          const submit = oauthCallbackSubmitters.get(serverName)
+          if (submit) {
+            // Validate the callback URL before submitting. The submit
+            // callback in auth.ts silently ignores URLs missing a code
+            // param, which would leave the auth promise unresolved and
+            // block the control message loop until timeout.
+            let hasCodeOrError = false
+            try {
+              const parsed = new URL(callbackUrl)
+              hasCodeOrError =
+                parsed.searchParams.has('code') ||
+                parsed.searchParams.has('error')
+            } catch {
+              // Invalid URL
+            }
+            if (!hasCodeOrError) {
+              sendControlResponseError(
+                message,
+                'Invalid callback URL: missing authorization code. Please paste the full redirect URL including the code parameter.',
+              )
+            } else {
+              oauthManualCallbackUsed.add(serverName)
+              submit(callbackUrl)
+              // Wait for auth (token exchange) to complete before responding.
+              // Reconnect is handled by the extension via handleAuthDone →
+              // mcp_reconnect (which updates dynamicMcpState for tools).
+              const authPromise = oauthAuthPromises.get(serverName)
+              if (authPromise) {
+                try {
+                  await authPromise
+                  sendControlResponseSuccess(message)
+                } catch (error) {
+                  sendControlResponseError(
+                    message,
+                    error instanceof Error
+                      ? error.message
+                      : 'OAuth authentication failed',
+                  )
+                }
+              } else {
+                sendControlResponseSuccess(message)
+              }
+            }
+          } else {
+            sendControlResponseError(
+              message,
+              `No active OAuth flow for server: ${serverName}`,
+            )
+          }
+        } else if (message.request.subtype === 'claude_authenticate') {
+          // Anthropic OAuth over the control channel. The SDK client owns
+          // the user's browser (we're headless in -p mode); we hand back
+          // both URLs and wait. Automatic URL → localhost listener catches
+          // the redirect if the browser is on this host; manual URL → the
+          // success page shows "code#state" for claude_oauth_callback.
+          const { loginWithClaudeAi } = message.request
+
+          // Clean up any prior flow. cleanup() closes the localhost listener
+          // and nulls the manual resolver. The prior `flow` promise is left
+          // pending (AuthCodeListener.close() does not reject) but its object
+          // graph becomes unreachable once the server handle is released and
+          // is GC'd — no fd or port is held.
+          claudeOAuth?.service.cleanup()
+
+          logEvent('tengu_oauth_flow_start', {
+            loginWithClaudeAi: loginWithClaudeAi ?? true,
+          })
+
+          const service = new OAuthService()
+          let urlResolver!: (urls: {
+            manualUrl: string
+            automaticUrl: string
+          }) => void
+          const urlPromise = new Promise<{
+            manualUrl: string
+            automaticUrl: string
+          }>(resolve => {
+            urlResolver = resolve
+          })
+
+          const flow = service
+            .startOAuthFlow(
+              async (manualUrl, automaticUrl) => {
+                // automaticUrl is always defined when skipBrowserOpen is set;
+                // the signature is optional only for the existing single-arg callers.
+                urlResolver({ manualUrl, automaticUrl: automaticUrl! })
+              },
+              {
+                loginWithClaudeAi: loginWithClaudeAi ?? true,
+                skipBrowserOpen: true,
+              },
+            )
+            .then(async tokens => {
+              // installOAuthTokens: performLogout (clear stale state) →
+              // store profile → saveOAuthTokensIfNeeded → clearOAuthTokenCache
+              // → clearAuthRelatedCaches. After this resolves, the memoized
+              // getClaudeAIOAuthTokens in this process is invalidated; the
+              // next API call re-reads keychain/file and works. No respawn.
+              await installOAuthTokens(tokens)
+              logEvent('tengu_oauth_success', {
+                loginWithClaudeAi: loginWithClaudeAi ?? true,
+              })
+            })
+            .finally(() => {
+              service.cleanup()
+              if (claudeOAuth?.service === service) {
+                claudeOAuth = null
+              }
+            })
+
+          claudeOAuth = { service, flow }
+
+          // Attach the rejection handler before awaiting so a synchronous
+          // startOAuthFlow failure doesn't surface as an unhandled rejection.
+          // The claude_oauth_callback handler re-awaits flow for the manual
+          // path and surfaces the real error to the client.
+          void flow.catch(err =>
+            logForDebugging(`claude_authenticate flow ended: ${err}`, {
+              level: 'info',
+            }),
+          )
+
+          try {
+            // Race against flow: if startOAuthFlow rejects before calling
+            // the authURLHandler (e.g. AuthCodeListener.start() fails with
+            // EACCES or fd exhaustion), urlPromise would pend forever and
+            // wedge the stdin loop. flow resolving first is unreachable in
+            // practice (it's suspended on the same urls we're waiting for).
+            const { manualUrl, automaticUrl } = await Promise.race([
+              urlPromise,
+              flow.then(() => {
+                throw new Error(
+                  'OAuth flow completed without producing auth URLs',
+                )
+              }),
+            ])
+            sendControlResponseSuccess(message, {
+              manualUrl,
+              automaticUrl,
+            })
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (
+          message.request.subtype === 'claude_oauth_callback' ||
+          message.request.subtype === 'claude_oauth_wait_for_completion'
+        ) {
+          if (!claudeOAuth) {
+            sendControlResponseError(
+              message,
+              'No active claude_authenticate flow',
+            )
+          } else {
+            // Inject the manual code synchronously — must happen in stdin
+            // message order so a subsequent claude_authenticate doesn't
+            // replace the service before this code lands.
+            if (message.request.subtype === 'claude_oauth_callback') {
+              claudeOAuth.service.handleManualAuthCodeInput({
+                authorizationCode: message.request.authorizationCode,
+                state: message.request.state,
+              })
+            }
+            // Detach the await — the stdin reader is serial and blocking
+            // here deadlocks claude_oauth_wait_for_completion: flow may
+            // only resolve via a future claude_oauth_callback on stdin,
+            // which can't be read while we're parked. Capture the binding;
+            // claudeOAuth is nulled in flow's own .finally.
+            const { flow } = claudeOAuth
+            void flow.then(
+              () => {
+                const accountInfo = getAccountInformation()
+                sendControlResponseSuccess(message, {
+                  account: {
+                    email: accountInfo?.email,
+                    organization: accountInfo?.organization,
+                    subscriptionType: accountInfo?.subscription,
+                    tokenSource: accountInfo?.tokenSource,
+                    apiKeySource: accountInfo?.apiKeySource,
+                    apiProvider: getAPIProvider(),
+                  },
+                })
+              },
+              (error: unknown) =>
+                sendControlResponseError(message, errorMessage(error)),
+            )
+          }
+        } else if (message.request.subtype === 'mcp_clear_auth') {
+          const { serverName } = message.request
+          const currentAppState = getAppState()
+          const config =
+            getMcpConfigByName(serverName) ??
+            mcpClients.find(c => c.name === serverName)?.config ??
+            currentAppState.mcp.clients.find(c => c.name === serverName)
+              ?.config ??
+            null
+          if (!config) {
+            sendControlResponseError(message, `Server not found: ${serverName}`)
+          } else if (config.type !== 'sse' && config.type !== 'http') {
+            sendControlResponseError(
+              message,
+              `Cannot clear auth for server type "${config.type}"`,
+            )
+          } else {
+            await revokeServerTokens(serverName, config)
+            const result = await reconnectMcpServerImpl(serverName, config)
+            const prefix = getMcpPrefix(serverName)
+            setAppState(prev => ({
+              ...prev,
+              mcp: {
+                ...prev.mcp,
+                clients: prev.mcp.clients.map(c =>
+                  c.name === serverName ? result.client : c,
+                ),
+                tools: [
+                  ...reject(prev.mcp.tools, t => t.name?.startsWith(prefix)),
+                  ...result.tools,
+                ],
+                commands: [
+                  ...reject(prev.mcp.commands, c =>
+                    commandBelongsToServer(c, serverName),
+                  ),
+                  ...result.commands,
+                ],
+                resources:
+                  result.resources && result.resources.length > 0
+                    ? {
+                        ...prev.mcp.resources,
+                        [serverName]: result.resources,
+                      }
+                    : omit(prev.mcp.resources, serverName),
+              },
+            }))
+            sendControlResponseSuccess(message, {})
+          }
+        } else if (message.request.subtype === 'apply_flag_settings') {
+          // Snapshot the current model before applying — we need to detect
+          // model switches so we can inject breadcrumbs and notify listeners.
+          const prevModel = getMainLoopModel()
+
+          // Merge the provided settings into the in-memory flag settings
+          const existing = getFlagSettingsInline() ?? {}
+          const incoming = message.request.settings
+          // Shallow-merge top-level keys; getSettingsForSource handles
+          // the deep merge with file-based flag settings via mergeWith.
+          // JSON serialization drops `undefined`, so callers use `null`
+          // to signal "clear this key". Convert nulls to deletions so
+          // SettingsSchema().safeParse() doesn't reject the whole object
+          // (z.string().optional() accepts string | undefined, not null).
+          const merged = { ...existing, ...incoming }
+          for (const key of Object.keys(merged)) {
+            if (merged[key as keyof typeof merged] === null) {
+              delete merged[key as keyof typeof merged]
+            }
+          }
+          setFlagSettingsInline(merged)
+          // Route through notifyChange so fanOut() resets the settings cache
+          // before listeners run. The subscriber at :392 calls
+          // applySettingsChange for us. Pre-#20625 this was a direct
+          // applySettingsChange() call that relied on its own internal reset —
+          // now that the reset is centralized in fanOut, a direct call here
+          // would read stale cached settings and silently drop the update.
+          // Bonus: going through notifyChange also tells the other subscribers
+          // (loadPluginHooks, sandbox-adapter) about the change, which the
+          // previous direct call skipped.
+          settingsChangeDetector.notifyChange('flagSettings')
+
+          // If the incoming settings include a model change, update the
+          // override so getMainLoopModel() reflects it. The override has
+          // higher priority than the settings cascade in
+          // getUserSpecifiedModelSetting(), so without this update,
+          // getMainLoopModel() returns the stale override and the model
+          // change is silently ignored (matching set_model at :2811).
+          if ('model' in incoming) {
+            if (incoming.model != null) {
+              setMainLoopModelOverride(String(incoming.model))
+            } else {
+              setMainLoopModelOverride(undefined)
+            }
+          }
+
+          // If the model changed, inject breadcrumbs so the model sees the
+          // mid-conversation switch, and notify metadata listeners (CCR).
+          const newModel = getMainLoopModel()
+          if (newModel !== prevModel) {
+            activeUserSpecifiedModel = newModel
+            const modelArg = incoming.model ? String(incoming.model) : 'default'
+            notifySessionMetadataChanged({ model: newModel })
+            injectModelSwitchBreadcrumbs(modelArg, newModel)
+          }
+
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'get_settings') {
+          const currentAppState = getAppState()
+          const model = getMainLoopModel()
+          // modelSupportsEffort gate matches claude.ts — applied.effort must
+          // mirror what actually goes to the API, not just what's configured.
+          const effort = modelSupportsEffort(model)
+            ? resolveAppliedEffort(model, currentAppState.effortValue)
+            : undefined
+          sendControlResponseSuccess(message, {
+            ...getSettingsWithSources(),
+            applied: {
+              model,
+              // Numeric effort (ant-only) → null; SDK schema is string-level only.
+              effort: typeof effort === 'string' ? effort : null,
+            },
+          })
+        } else if (message.request.subtype === 'stop_task') {
+          const { task_id: taskId } = message.request
+          try {
+            await stopTask(taskId, {
+              getAppState,
+              setAppState,
+            })
+            sendControlResponseSuccess(message, {})
+          } catch (error) {
+            sendControlResponseError(message, errorMessage(error))
+          }
+        } else if (message.request.subtype === 'generate_session_title') {
+          // Fire-and-forget so the Haiku call does not block the stdin loop
+          // (which would delay processing of subsequent user messages /
+          // interrupts for the duration of the API roundtrip).
+          const { description, persist } = message.request
+          // Reuse the live controller only if it has not already been aborted
+          // (e.g. by interrupt()); an aborted signal would cause queryHaiku to
+          // immediately throw APIUserAbortError → {title: null}.
+          const titleSignal = (
+            abortController && !abortController.signal.aborted
+              ? abortController
+              : createAbortController()
+          ).signal
+          void (async () => {
+            try {
+              const title = await generateSessionTitle(description, titleSignal)
+              if (title && persist) {
+                try {
+                  saveAiGeneratedTitle(getSessionId() as UUID, title)
+                } catch (e) {
+                  logError(e)
+                }
+              }
+              sendControlResponseSuccess(message, { title })
+            } catch (e) {
+              // Unreachable in practice — generateSessionTitle wraps its
+              // own body and returns null, saveAiGeneratedTitle is wrapped
+              // above. Propagate (not swallow) so unexpected failures are
+              // visible to the SDK caller (hostComms.ts catches and logs).
+              sendControlResponseError(message, errorMessage(e))
+            }
+          })()
+        } else if (message.request.subtype === 'side_question') {
+          // Same fire-and-forget pattern as generate_session_title above —
+          // the forked agent's API roundtrip must not block the stdin loop.
+          //
+          // The snapshot captured by stopHooks (for querySource === 'sdk')
+          // holds the exact systemPrompt/userContext/systemContext/messages
+          // sent on the last main-thread turn. Reusing them gives a byte-
+          // identical prefix → prompt cache hit.
+          //
+          // Fallback (resume before first turn completes — no snapshot yet):
+          // rebuild from scratch. buildSideQuestionFallbackParams mirrors
+          // QueryEngine.ts:ask()'s system prompt assembly (including
+          // --system-prompt / --append-system-prompt) so the rebuilt prefix
+          // matches in the common case. May still miss the cache for
+          // coordinator mode or memory-mechanics extras — acceptable, the
+          // alternative is the side question failing entirely.
+          const { question } = message.request
+          void (async () => {
+            try {
+              const saved = getLastCacheSafeParams()
+              const cacheSafeParams = saved
+                ? {
+                    ...saved,
+                    // If the last turn was interrupted, the snapshot holds an
+                    // already-aborted controller; createChildAbortController in
+                    // createSubagentContext would propagate it and the fork
+                    // would die before sending a request. The controller is
+                    // not part of the cache key — swapping in a fresh one is
+                    // safe. Same guard as generate_session_title above.
+                    toolUseContext: {
+                      ...saved.toolUseContext,
+                      abortController: createAbortController(),
+                    },
+                  }
+                : await buildSideQuestionFallbackParams({
+                    tools: buildAllTools(getAppState()),
+                    commands: currentCommands,
+                    mcpClients: [
+                      ...getAppState().mcp.clients,
+                      ...sdkClients,
+                      ...dynamicMcpState.clients,
+                    ],
+                    messages: mutableMessages,
+                    readFileState,
+                    getAppState,
+                    setAppState,
+                    customSystemPrompt: options.systemPrompt,
+                    appendSystemPrompt: options.appendSystemPrompt,
+                    thinkingConfig: options.thinkingConfig,
+                    agents: currentAgents,
+                  })
+              const result = await runSideQuestion({
+                question,
+                cacheSafeParams,
+              })
+              sendControlResponseSuccess(message, { response: result.response })
+            } catch (e) {
+              sendControlResponseError(message, errorMessage(e))
+            }
+          })()
+        } else if (
+          (feature('PROACTIVE') || feature('KAIROS')) &&
+          (message.request as { subtype: string }).subtype === 'set_proactive'
+        ) {
+          const req = message.request as unknown as {
+            subtype: string
+            enabled: boolean
+          }
+          if (req.enabled) {
+            if (!proactiveModule!.isProactiveActive()) {
+              proactiveModule!.activateProactive('command')
+              scheduleProactiveTick!()
+            }
+          } else {
+            proactiveModule!.deactivateProactive()
+          }
+          sendControlResponseSuccess(message)
+        } else if (message.request.subtype === 'remote_control') {
+          if (message.request.enabled) {
+            if (bridgeHandle) {
+              // Already connected
+              sendControlResponseSuccess(message, {
+                session_url: getRemoteSessionUrl(
+                  bridgeHandle.bridgeSessionId,
+                  bridgeHandle.sessionIngressUrl,
+                ),
+                connect_url: buildBridgeConnectUrl(
+                  bridgeHandle.environmentId,
+                  bridgeHandle.sessionIngressUrl,
+                ),
+                environment_id: bridgeHandle.environmentId,
+              })
+            } else {
+              // initReplBridge surfaces gate-failure reasons via
+              // onStateChange('failed', detail) before returning null.
+              // Capture so the control-response error is actionable
+              // ("/login", "disabled by your organization's policy", etc.)
+              // instead of a generic "initialization failed".
+              let bridgeFailureDetail: string | undefined
+              try {
+                const { initReplBridge } = await import(
+                  'src/bridge/initReplBridge.js'
+                )
+                const handle = await initReplBridge({
+                  onInboundMessage(msg) {
+                    const fields = extractInboundMessageFields(msg)
+                    if (!fields) return
+                    const { content, uuid } = fields
+                    enqueue({
+                      value: content,
+                      mode: 'prompt' as const,
+                      uuid,
+                      skipSlashCommands: true,
+                    })
+                    void run()
+                  },
+                  onPermissionResponse(response) {
+                    // Forward bridge permission responses into the
+                    // stdin processing loop so they resolve pending
+                    // permission requests from the SDK consumer.
+                    structuredIO.injectControlResponse(response)
+                  },
+                  onInterrupt() {
+                    abortController?.abort()
+                  },
+                  onSetModel(model) {
+                    const resolved =
+                      model === 'default' ? getDefaultMainLoopModel() : model
+                    activeUserSpecifiedModel = resolved
+                    setMainLoopModelOverride(resolved)
+                  },
+                  onSetMaxThinkingTokens(maxTokens) {
+                    if (maxTokens === null) {
+                      options.thinkingConfig = undefined
+                    } else if (maxTokens === 0) {
+                      options.thinkingConfig = { type: 'disabled' }
+                    } else {
+                      options.thinkingConfig = {
+                        type: 'enabled',
+                        budgetTokens: maxTokens,
+                      }
+                    }
+                  },
+                  onStateChange(state, detail) {
+                    if (state === 'failed') {
+                      bridgeFailureDetail = detail
+                    }
+                    logForDebugging(
+                      `[bridge:sdk] State change: ${state}${detail ? ` — ${detail}` : ''}`,
+                    )
+                    output.enqueue({
+                      type: 'system' as StdoutMessage['type'],
+                      subtype: 'bridge_state' as string,
+                      state,
+                      detail,
+                      uuid: randomUUID(),
+                      session_id: getSessionId(),
+                    } as StdoutMessage)
+                  },
+                  initialMessages:
+                    mutableMessages.length > 0 ? mutableMessages : undefined,
+                })
+                if (!handle) {
+                  sendControlResponseError(
+                    message,
+                    bridgeFailureDetail ??
+                      'Remote Control initialization failed',
+                  )
+                } else {
+                  bridgeHandle = handle
+                  bridgeLastForwardedIndex = mutableMessages.length
+                  // Forward permission requests to the bridge
+                  structuredIO.setOnControlRequestSent(request => {
+                    handle.sendControlRequest(request)
+                  })
+                  // Cancel stale bridge permission prompts when the SDK
+                  // consumer resolves a can_use_tool request first.
+                  structuredIO.setOnControlRequestResolved(requestId => {
+                    handle.sendControlCancelRequest(requestId)
+                  })
+                  sendControlResponseSuccess(message, {
+                    session_url: getRemoteSessionUrl(
+                      handle.bridgeSessionId,
+                      handle.sessionIngressUrl,
+                    ),
+                    connect_url: buildBridgeConnectUrl(
+                      handle.environmentId,
+                      handle.sessionIngressUrl,
+                    ),
+                    environment_id: handle.environmentId,
+                  })
+                }
+              } catch (err) {
+                sendControlResponseError(message, errorMessage(err))
+              }
+            }
+          } else {
+            // Disable
+            if (bridgeHandle) {
+              structuredIO.setOnControlRequestSent(undefined)
+              structuredIO.setOnControlRequestResolved(undefined)
+              await bridgeHandle.teardown()
+              bridgeHandle = null
+            }
+            sendControlResponseSuccess(message)
+          }
+        } else {
+          // Unknown control request subtype — send an error response so
+          // the caller doesn't hang waiting for a reply that never comes.
+          sendControlResponseError(
+            message,
+            `Unsupported control request subtype: ${(message.request as { subtype: string }).subtype}`,
+          )
+        }
+        continue
+      } else if (message.type === 'control_response') {
+        // Replay control_response messages when replay mode is enabled
+        if (options.replayUserMessages) {
+          output.enqueue(message)
+        }
+        continue
+      } else if (message.type === 'keep_alive') {
+        // Silently ignore keep-alive messages
+        continue
+      } else if (message.type === 'update_environment_variables') {
+        // Handled in structuredIO.ts, but TypeScript needs the type guard
+        continue
+      } else if (message.type === 'assistant' || message.type === 'system') {
+        // History replay from bridge: inject into mutableMessages as
+        // conversation context so the model sees prior turns.
+        const internalMsgs = toInternalMessages([message])
+        mutableMessages.push(...internalMsgs)
+        // Echo assistant messages back so CCR displays them
+        if (message.type === 'assistant' && options.replayUserMessages) {
+          output.enqueue(message)
+        }
+        continue
+      }
+      // After handling control, keep-alive, env-var, assistant, and system
+      // messages above, only user messages should remain.
+      if (message.type !== 'user') {
+        continue
+      }
+
+      // First prompt message implicitly initializes if not already done.
+      initialized = true
+
+      // Check for duplicate user message - skip if already processed
+      if (message.uuid) {
+        const sessionId = getSessionId() as UUID
+        const existsInSession = await doesMessageExistInSession(
+          sessionId,
+          message.uuid,
+        )
+
+        // Check both historical duplicates (from file) and runtime duplicates (this session)
+        if (existsInSession || receivedMessageUuids.has(message.uuid)) {
+          logForDebugging(`Skipping duplicate user message: ${message.uuid}`)
+          // Send acknowledgment for duplicate message if replay mode is enabled
+          if (options.replayUserMessages) {
+            logForDebugging(
+              `Sending acknowledgment for duplicate user message: ${message.uuid}`,
+            )
+            output.enqueue({
+              type: 'user',
+              message: message.message,
+              session_id: sessionId,
+              parent_tool_use_id: null,
+              uuid: message.uuid,
+              timestamp: message.timestamp,
+              isReplay: true,
+            } as SDKUserMessageReplay)
+          }
+          // Historical dup = transcript already has this turn's output, so it
+          // ran but its lifecycle was never closed (interrupted before ack).
+          // Runtime dups don't need this — the original enqueue path closes them.
+          if (existsInSession) {
+            notifyCommandLifecycle(message.uuid, 'completed')
+          }
+          // Don't enqueue duplicate messages for execution
+          continue
+        }
+
+        // Track this UUID to prevent runtime duplicates
+        trackReceivedMessageUuid(message.uuid)
+      }
+
+      enqueue({
+        mode: 'prompt' as const,
+        // file_attachments rides the protobuf catchall from the web composer.
+        // Same-ref no-op when absent (no 'file_attachments' key).
+        value: await resolveAndPrepend(message, message.message.content),
+        uuid: message.uuid,
+        priority: message.priority,
+      })
+      // Increment prompt count for attribution tracking and save snapshot
+      // The snapshot persists promptCount so it survives compaction
+      if (feature('COMMIT_ATTRIBUTION')) {
+        setAppState(prev => ({
+          ...prev,
+          attribution: incrementPromptCount(prev.attribution, snapshot => {
+            void recordAttributionSnapshot(snapshot).catch(error => {
+              logForDebugging(`Attribution: Failed to save snapshot: ${error}`)
+            })
+          }),
+        }))
+      }
+      void run()
+    }
+    inputClosed = true
+    cronScheduler?.stop()
+    if (!running) {
+      // If a push-suggestion is in-flight, wait for it to emit before closing
+      // the output stream (5 s safety timeout to prevent hanging).
+      if (suggestionState.inflightPromise) {
+        await Promise.race([suggestionState.inflightPromise, sleep(5000)])
+      }
+      suggestionState.abortController?.abort()
+      suggestionState.abortController = null
+      await finalizePendingAsyncHooks()
+      unsubscribeSkillChanges()
+      unsubscribeAuthStatus?.()
+      statusListeners.delete(rateLimitListener)
+      output.done()
+    }
+  })()
+
+  return output
+}
+
+/**
+ * Creates a CanUseToolFn that incorporates a custom permission prompt tool.
+ * This function converts the permissionPromptTool into a CanUseToolFn that can be used in ask.tsx
+ */
+export function createCanUseToolWithPermissionPrompt(
+  permissionPromptTool: PermissionPromptTool,
+): CanUseToolFn {
+  const canUseTool: CanUseToolFn = async (
+    tool,
+    input,
+    toolUseContext,
+    assistantMessage,
+    toolUseId,
+    forceDecision,
+  ) => {
+    const mainPermissionResult =
+      forceDecision ??
+      (await hasPermissionsToUseTool(
+        tool,
+        input,
+        toolUseContext,
+        assistantMessage,
+        toolUseId,
+      ))
+
+    // If the tool is allowed or denied, return the result
+    if (
+      mainPermissionResult.behavior === 'allow' ||
+      mainPermissionResult.behavior === 'deny'
+    ) {
+      return mainPermissionResult
+    }
+
+    // Race the permission prompt tool against the abort signal.
+    //
+    // Why we need this: The permission prompt tool may block indefinitely waiting
+    // for user input (e.g., via stdin or a UI dialog). If the user triggers an
+    // interrupt (Ctrl+C), we need to detect it even while the tool is blocked.
+    // Without this race, the abort check would only run AFTER the tool completes,
+    // which may never happen if the tool is waiting for input that will never come.
+    //
+    // The second check (combinedSignal.aborted) handles a race condition where
+    // abort fires after Promise.race resolves but before we reach this check.
+    const { signal: combinedSignal, cleanup: cleanupAbortListener } =
+      createCombinedAbortSignal(toolUseContext.abortController.signal)
+
+    // Check if already aborted before starting the race
+    if (combinedSignal.aborted) {
+      cleanupAbortListener()
+      return {
+        behavior: 'deny',
+        message: 'Permission prompt was aborted.',
+        decisionReason: {
+          type: 'permissionPromptTool' as const,
+          permissionPromptToolName: tool.name,
+          toolResult: undefined,
+        },
+      }
+    }
+
+    const abortPromise = new Promise<'aborted'>(resolve => {
+      combinedSignal.addEventListener('abort', () => resolve('aborted'), {
+        once: true,
+      })
+    })
+
+    const toolCallPromise = permissionPromptTool.call(
+      {
+        tool_name: tool.name,
+        input,
+        tool_use_id: toolUseId,
+      },
+      toolUseContext,
+      canUseTool,
+      assistantMessage,
+    )
+
+    const raceResult = await Promise.race([toolCallPromise, abortPromise])
+    cleanupAbortListener()
+
+    if (raceResult === 'aborted' || combinedSignal.aborted) {
+      return {
+        behavior: 'deny',
+        message: 'Permission prompt was aborted.',
+        decisionReason: {
+          type: 'permissionPromptTool' as const,
+          permissionPromptToolName: tool.name,
+          toolResult: undefined,
+        },
+      }
+    }
+
+    // TypeScript narrowing: after the abort check, raceResult must be ToolResult
+    const result = raceResult as Awaited<typeof toolCallPromise>
+
+    const permissionToolResultBlockParam =
+      permissionPromptTool.mapToolResultToToolResultBlockParam(result.data, '1')
+    if (
+      !permissionToolResultBlockParam.content ||
+      !Array.isArray(permissionToolResultBlockParam.content) ||
+      !permissionToolResultBlockParam.content[0] ||
+      permissionToolResultBlockParam.content[0].type !== 'text' ||
+      typeof permissionToolResultBlockParam.content[0].text !== 'string'
+    ) {
+      throw new Error(
+        'Permission prompt tool returned an invalid result. Expected a single text block param with type="text" and a string text value.',
+      )
+    }
+    return permissionPromptToolResultToPermissionDecision(
+      permissionToolOutputSchema().parse(
+        safeParseJSON(permissionToolResultBlockParam.content[0].text),
+      ),
+      permissionPromptTool,
+      input,
+      toolUseContext,
+    )
+  }
+  return canUseTool
+}
+
+// Exported for testing — regression: this used to crash at construction when
+// getMcpTools() was empty (before per-server connects populated appState).
+export function getCanUseToolFn(
+  permissionPromptToolName: string | undefined,
+  structuredIO: StructuredIO,
+  getMcpTools: () => Tool[],
+  onPermissionPrompt?: (details: RequiresActionDetails) => void,
+): CanUseToolFn {
+  if (permissionPromptToolName === 'stdio') {
+    return structuredIO.createCanUseTool(onPermissionPrompt)
+  }
+  if (!permissionPromptToolName) {
+    return async (
+      tool,
+      input,
+      toolUseContext,
+      assistantMessage,
+      toolUseId,
+      forceDecision,
+    ) =>
+      forceDecision ??
+      (await hasPermissionsToUseTool(
+        tool,
+        input,
+        toolUseContext,
+        assistantMessage,
+        toolUseId,
+      ))
+  }
+  // Lazy lookup: MCP connects are per-server incremental in print mode, so
+  // the tool may not be in appState yet at init time. Resolve on first call
+  // (first permission prompt), by which point connects have had time to finish.
+  let resolved: CanUseToolFn | null = null
+  return async (
+    tool,
+    input,
+    toolUseContext,
+    assistantMessage,
+    toolUseId,
+    forceDecision,
+  ) => {
+    if (!resolved) {
+      const mcpTools = getMcpTools()
+      const permissionPromptTool = mcpTools.find(t =>
+        toolMatchesName(t, permissionPromptToolName),
+      ) as PermissionPromptTool | undefined
+      if (!permissionPromptTool) {
+        const error = `Error: MCP tool ${permissionPromptToolName} (passed via --permission-prompt-tool) not found. Available MCP tools: ${mcpTools.map(t => t.name).join(', ') || 'none'}`
+        process.stderr.write(`${error}\n`)
+        gracefulShutdownSync(1)
+        throw new Error(error)
+      }
+      if (!permissionPromptTool.inputJSONSchema) {
+        const error = `Error: tool ${permissionPromptToolName} (passed via --permission-prompt-tool) must be an MCP tool`
+        process.stderr.write(`${error}\n`)
+        gracefulShutdownSync(1)
+        throw new Error(error)
+      }
+      resolved = createCanUseToolWithPermissionPrompt(permissionPromptTool)
+    }
+    return resolved(
+      tool,
+      input,
+      toolUseContext,
+      assistantMessage,
+      toolUseId,
+      forceDecision,
+    )
+  }
+}
+
+async function handleInitializeRequest(
+  request: SDKControlInitializeRequest,
+  requestId: string,
+  initialized: boolean,
+  output: Stream<StdoutMessage>,
+  commands: Command[],
+  modelInfos: ModelInfo[],
+  structuredIO: StructuredIO,
+  enableAuthStatus: boolean,
+  options: {
+    systemPrompt: string | undefined
+    appendSystemPrompt: string | undefined
+    agent?: string | undefined
+    userSpecifiedModel?: string | undefined
+    [key: string]: unknown
+  },
+  agents: AgentDefinition[],
+  getAppState: () => AppState,
+): Promise<void> {
+  if (initialized) {
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        error: 'Already initialized',
+        request_id: requestId,
+        pending_permission_requests:
+          structuredIO.getPendingPermissionRequests(),
+      },
+    })
+    return
+  }
+
+  // Apply systemPrompt/appendSystemPrompt from stdin to avoid ARG_MAX limits
+  if (request.systemPrompt !== undefined) {
+    options.systemPrompt = request.systemPrompt
+  }
+  if (request.appendSystemPrompt !== undefined) {
+    options.appendSystemPrompt = request.appendSystemPrompt
+  }
+  if (request.promptSuggestions !== undefined) {
+    options.promptSuggestions = request.promptSuggestions
+  }
+
+  // Merge agents from stdin to avoid ARG_MAX limits
+  if (request.agents) {
+    const stdinAgents = parseAgentsFromJson(request.agents, 'flagSettings')
+    agents.push(...stdinAgents)
+  }
+
+  // Re-evaluate main thread agent after SDK agents are merged
+  // This allows --agent to reference agents defined via SDK
+  if (options.agent) {
+    // If main.tsx already found this agent (filesystem-defined), it already
+    // applied systemPrompt/model/initialPrompt. Skip to avoid double-apply.
+    const alreadyResolved = getMainThreadAgentType() === options.agent
+    const mainThreadAgent = agents.find(a => a.agentType === options.agent)
+    if (mainThreadAgent && !alreadyResolved) {
+      // Update the main thread agent type in bootstrap state
+      setMainThreadAgentType(mainThreadAgent.agentType)
+
+      // Apply the agent's system prompt if user hasn't specified a custom one
+      // SDK agents are always custom agents (not built-in), so getSystemPrompt() takes no args
+      if (!options.systemPrompt && !isBuiltInAgent(mainThreadAgent)) {
+        const agentSystemPrompt = mainThreadAgent.getSystemPrompt()
+        if (agentSystemPrompt) {
+          options.systemPrompt = agentSystemPrompt
+        }
+      }
+
+      // Apply the agent's model if user didn't specify one and agent has a model
+      if (
+        !options.userSpecifiedModel &&
+        mainThreadAgent.model &&
+        mainThreadAgent.model !== 'inherit'
+      ) {
+        const agentModel = parseUserSpecifiedModel(mainThreadAgent.model)
+        setMainLoopModelOverride(agentModel)
+      }
+
+      // SDK-defined agents arrive via init, so main.tsx's lookup missed them.
+      if (mainThreadAgent.initialPrompt) {
+        structuredIO.prependUserMessage(mainThreadAgent.initialPrompt)
+      }
+    } else if (mainThreadAgent?.initialPrompt) {
+      // Filesystem-defined agent (alreadyResolved by main.tsx). main.tsx
+      // handles initialPrompt for the string inputPrompt case, but when
+      // inputPrompt is an AsyncIterable (SDK stream-json), it can't
+      // concatenate — fall back to prependUserMessage here.
+      structuredIO.prependUserMessage(mainThreadAgent.initialPrompt)
+    }
+  }
+
+  const settings = getSettings_DEPRECATED()
+  const outputStyle = settings?.outputStyle || DEFAULT_OUTPUT_STYLE_NAME
+  const availableOutputStyles = await getAllOutputStyles(getCwd())
+
+  // Get account information
+  const accountInfo = getAccountInformation()
+  if (request.hooks) {
+    const hooks: Partial<Record<HookEvent, HookCallbackMatcher[]>> = {}
+    for (const [event, matchers] of Object.entries(request.hooks)) {
+      hooks[event as HookEvent] = matchers.map(matcher => {
+        const callbacks = matcher.hookCallbackIds.map(callbackId => {
+          return structuredIO.createHookCallback(callbackId, matcher.timeout)
+        })
+        return {
+          matcher: matcher.matcher,
+          hooks: callbacks,
+        }
+      })
+    }
+    registerHookCallbacks(hooks)
+  }
+  if (request.jsonSchema) {
+    setInitJsonSchema(request.jsonSchema)
+  }
+  const initResponse: SDKControlInitializeResponse = {
+    commands: commands
+      .filter(cmd => cmd.userInvocable !== false)
+      .map(cmd => ({
+        name: getCommandName(cmd),
+        description: formatDescriptionWithSource(cmd),
+        argumentHint: cmd.argumentHint || '',
+      })),
+    agents: agents.map(agent => ({
+      name: agent.agentType,
+      description: agent.whenToUse,
+      // 'inherit' is an internal sentinel; normalize to undefined for the public API
+      model: agent.model === 'inherit' ? undefined : agent.model,
+    })),
+    output_style: outputStyle,
+    available_output_styles: Object.keys(availableOutputStyles),
+    models: modelInfos,
+    account: {
+      email: accountInfo?.email,
+      organization: accountInfo?.organization,
+      subscriptionType: accountInfo?.subscription,
+      tokenSource: accountInfo?.tokenSource,
+      apiKeySource: accountInfo?.apiKeySource,
+      // getAccountInformation() returns undefined under 3P providers, so the
+      // other fields are all absent. apiProvider disambiguates "not logged
+      // in" (firstParty + tokenSource:none) from "3P, login not applicable".
+      apiProvider: getAPIProvider(),
+    },
+    pid: process.pid,
+  }
+
+  if (isFastModeEnabled() && isFastModeAvailable()) {
+    const appState = getAppState()
+    initResponse.fast_mode_state = getFastModeState(
+      options.userSpecifiedModel ?? null,
+      appState.fastMode,
+    )
+  }
+
+  output.enqueue({
+    type: 'control_response',
+    response: {
+      subtype: 'success',
+      request_id: requestId,
+      response: initResponse,
+    },
+  })
+
+  // After the initialize message, check the auth status-
+  // This will get notified of changes, but we also want to send the
+  // initial state.
+  if (enableAuthStatus) {
+    const authStatusManager = AwsAuthStatusManager.getInstance()
+    const status = authStatusManager.getStatus()
+    if (status) {
+      output.enqueue({
+        type: 'auth_status',
+        isAuthenticating: status.isAuthenticating,
+        output: status.output,
+        error: status.error,
+        uuid: randomUUID(),
+        session_id: getSessionId(),
+      })
+    }
+  }
+}
+
+async function handleRewindFiles(
+  userMessageId: UUID,
+  appState: AppState,
+  setAppState: (updater: (prev: AppState) => AppState) => void,
+  dryRun: boolean,
+): Promise<RewindFilesResult> {
+  if (!fileHistoryEnabled()) {
+    return { canRewind: false, error: 'File rewinding is not enabled.' }
+  }
+  if (!fileHistoryCanRestore(appState.fileHistory, userMessageId)) {
+    return {
+      canRewind: false,
+      error: 'No file checkpoint found for this message.',
+    }
+  }
+
+  if (dryRun) {
+    const diffStats = await fileHistoryGetDiffStats(
+      appState.fileHistory,
+      userMessageId,
+    )
+    return {
+      canRewind: true,
+      filesChanged: diffStats?.filesChanged,
+      insertions: diffStats?.insertions,
+      deletions: diffStats?.deletions,
+    }
+  }
+
+  try {
+    await fileHistoryRewind(
+      updater =>
+        setAppState(prev => ({
+          ...prev,
+          fileHistory: updater(prev.fileHistory),
+        })),
+      userMessageId,
+    )
+  } catch (error) {
+    return {
+      canRewind: false,
+      error: `Failed to rewind: ${errorMessage(error)}`,
+    }
+  }
+
+  return { canRewind: true }
+}
+
+function handleSetPermissionMode(
+  request: { mode: InternalPermissionMode },
+  requestId: string,
+  toolPermissionContext: ToolPermissionContext,
+  output: Stream<StdoutMessage>,
+): ToolPermissionContext {
+  // Check if trying to switch to bypassPermissions mode
+  if (request.mode === 'bypassPermissions') {
+    if (isBypassPermissionsModeDisabled()) {
+      output.enqueue({
+        type: 'control_response',
+        response: {
+          subtype: 'error',
+          request_id: requestId,
+          error:
+            'Cannot set permission mode to bypassPermissions because it is disabled by settings or configuration',
+        },
+      })
+      return toolPermissionContext
+    }
+    if (!toolPermissionContext.isBypassPermissionsModeAvailable) {
+      output.enqueue({
+        type: 'control_response',
+        response: {
+          subtype: 'error',
+          request_id: requestId,
+          error:
+            'Cannot set permission mode to bypassPermissions because the session was not launched with --dangerously-skip-permissions',
+        },
+      })
+      return toolPermissionContext
+    }
+  }
+
+  // Check if trying to switch to auto mode without the classifier gate
+  if (
+    feature('TRANSCRIPT_CLASSIFIER') &&
+    request.mode === 'auto' &&
+    !isAutoModeGateEnabled()
+  ) {
+    const reason = getAutoModeUnavailableReason()
+    output.enqueue({
+      type: 'control_response',
+      response: {
+        subtype: 'error',
+        request_id: requestId,
+        error: reason
+          ? `Cannot set permission mode to auto: ${getAutoModeUnavailableNotification(reason)}`
+          : 'Cannot set permission mode to auto',
+      },
+    })
+    return toolPermissionContext
+  }
+
+  // Allow the mode switch
+  output.enqueue({
+    type: 'control_response',
+    response: {
+      subtype: 'success',
+      request_id: requestId,
+      response: {
+        mode: request.mode,
+      },
+    },
+  })
+
+  return {
+    ...transitionPermissionMode(
+      toolPermissionContext.mode,
+      request.mode,
+      toolPermissionContext,
+    ),
+    mode: request.mode,
+  }
+}
+
+/**
+ * IDE-triggered channel enable. Derives the ChannelEntry from the connection's
+ * pluginSource (IDE can't spoof kind/marketplace — we only take the server
+ * name), appends it to session allowedChannels, and runs the full gate. On
+ * gate failure, rolls back the append. On success, registers a notification
+ * handler that enqueues channel messages at priority:'next' — drainCommandQueue
+ * picks them up between turns.
+ *
+ * Intentionally does NOT register the claude/channel/permission handler that
+ * useManageMCPConnections sets up for interactive mode. That handler resolves
+ * a pending dialog inside handleInteractivePermission — but print.ts never
+ * calls handleInteractivePermission. When SDK permission lands on 'ask', it
+ * goes to the consumer's canUseTool callback over stdio; there is no CLI-side
+ * dialog for a remote "yes tbxkq" to resolve. If an IDE wants channel-relayed
+ * tool approval, that's IDE-side plumbing against its own pending-map. (Also
+ * gated separately by tengu_harbor_permissions — not yet shipping on
+ * interactive either.)
+ */
+function handleChannelEnable(
+  requestId: string,
+  serverName: string,
+  connectionPool: readonly MCPServerConnection[],
+  output: Stream<StdoutMessage>,
+): void {
+  const respondError = (error: string) =>
+    output.enqueue({
+      type: 'control_response',
+      response: { subtype: 'error', request_id: requestId, error },
+    })
+
+  if (!(feature('KAIROS') || feature('KAIROS_CHANNELS'))) {
+    return respondError('channels feature not available in this build')
+  }
+
+  // Only a 'connected' client has .capabilities and .client to register the
+  // handler on. The pool spread at the call site matches mcp_status.
+  const connection = connectionPool.find(
+    c => c.name === serverName && c.type === 'connected',
+  )
+  if (!connection || connection.type !== 'connected') {
+    return respondError(`server ${serverName} is not connected`)
+  }
+
+  const pluginSource = connection.config.pluginSource
+  const parsed = pluginSource ? parsePluginIdentifier(pluginSource) : undefined
+  if (!parsed?.marketplace) {
+    // No pluginSource or @-less source — can never pass the {plugin,
+    // marketplace}-keyed allowlist. Short-circuit with the same reason the
+    // gate would produce.
+    return respondError(
+      `server ${serverName} is not plugin-sourced; channel_enable requires a marketplace plugin`,
+    )
+  }
+
+  const entry: ChannelEntry = {
+    kind: 'plugin',
+    name: parsed.name,
+    marketplace: parsed.marketplace,
+  }
+  // Idempotency: don't double-append on repeat enable.
+  const prior = getAllowedChannels()
+  const already = prior.some(
+    e =>
+      e.kind === 'plugin' &&
+      e.name === entry.name &&
+      e.marketplace === entry.marketplace,
+  )
+  if (!already) setAllowedChannels([...prior, entry])
+
+  const gate = gateChannelServer(
+    serverName,
+    connection.capabilities,
+    pluginSource,
+  )
+  if (gate.action === 'skip') {
+    // Rollback — only remove the entry we appended.
+    if (!already) setAllowedChannels(prior)
+    return respondError(gate.reason)
+  }
+
+  const pluginId =
+    `${entry.name}@${entry.marketplace}` as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS
+  logMCPDebug(serverName, 'Channel notifications registered')
+  logEvent('tengu_mcp_channel_enable', { plugin: pluginId })
+
+  // Identical enqueue shape to the interactive register block in
+  // useManageMCPConnections. drainCommandQueue processes it between turns —
+  // channel messages queue at priority 'next' and are seen by the model on
+  // the turn after they arrive.
+  connection.client.setNotificationHandler(
+    ChannelMessageNotificationSchema(),
+    async notification => {
+      const { content, meta } = notification.params
+      logMCPDebug(
+        serverName,
+        `notifications/claude/channel: ${content.slice(0, 80)}`,
+      )
+      logEvent('tengu_mcp_channel_message', {
+        content_length: content.length,
+        meta_key_count: Object.keys(meta ?? {}).length,
+        entry_kind:
+          'plugin' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        is_dev: false,
+        plugin: pluginId,
+      })
+      enqueue({
+        mode: 'prompt',
+        value: wrapChannelMessage(serverName, content, meta),
+        priority: 'next',
+        isMeta: true,
+        origin: { kind: 'channel', server: serverName },
+        skipSlashCommands: true,
+      })
+    },
+  )
+
+  output.enqueue({
+    type: 'control_response',
+    response: {
+      subtype: 'success',
+      request_id: requestId,
+      response: undefined,
+    },
+  })
+}
+
+/**
+ * Re-register the channel notification handler after mcp_reconnect /
+ * mcp_toggle creates a new client. handleChannelEnable bound the handler to
+ * the OLD client object; allowedChannels survives the reconnect but the
+ * handler binding does not. Without this, channel messages silently drop
+ * after a reconnect while the IDE still believes the channel is live.
+ *
+ * Mirrors the interactive CLI's onConnectionAttempt in
+ * useManageMCPConnections, which re-gates on every new connection. Paired
+ * with registerElicitationHandlers at the same call sites.
+ *
+ * No-op if the server was never channel-enabled: gateChannelServer calls
+ * findChannelEntry internally and returns skip/session for an unlisted
+ * server, so reconnecting a non-channel MCP server costs one feature-flag
+ * check.
+ */
+function reregisterChannelHandlerAfterReconnect(
+  connection: MCPServerConnection,
+): void {
+  if (!(feature('KAIROS') || feature('KAIROS_CHANNELS'))) return
+  if (connection.type !== 'connected') return
+
+  const gate = gateChannelServer(
+    connection.name,
+    connection.capabilities,
+    connection.config.pluginSource,
+  )
+  if (gate.action !== 'register') return
+
+  const entry = findChannelEntry(connection.name, getAllowedChannels())
+  const pluginId =
+    entry?.kind === 'plugin'
+      ? (`${entry.name}@${entry.marketplace}` as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
+      : undefined
+
+  logMCPDebug(
+    connection.name,
+    'Channel notifications re-registered after reconnect',
+  )
+  connection.client.setNotificationHandler(
+    ChannelMessageNotificationSchema(),
+    async notification => {
+      const { content, meta } = notification.params
+      logMCPDebug(
+        connection.name,
+        `notifications/claude/channel: ${content.slice(0, 80)}`,
+      )
+      logEvent('tengu_mcp_channel_message', {
+        content_length: content.length,
+        meta_key_count: Object.keys(meta ?? {}).length,
+        entry_kind:
+          entry?.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        is_dev: entry?.dev ?? false,
+        plugin: pluginId,
+      })
+      enqueue({
+        mode: 'prompt',
+        value: wrapChannelMessage(connection.name, content, meta),
+        priority: 'next',
+        isMeta: true,
+        origin: { kind: 'channel', server: connection.name },
+        skipSlashCommands: true,
+      })
+    },
+  )
+}
+
+/**
+ * Emits an error message in the correct format based on outputFormat.
+ * When using stream-json, writes JSON to stdout; otherwise writes plain text to stderr.
+ */
+function emitLoadError(
+  message: string,
+  outputFormat: string | undefined,
+): void {
+  if (outputFormat === 'stream-json') {
+    const errorResult = {
+      type: 'result',
+      subtype: 'error_during_execution',
+      duration_ms: 0,
+      duration_api_ms: 0,
+      is_error: true,
+      num_turns: 0,
+      stop_reason: null,
+      session_id: getSessionId(),
+      total_cost_usd: 0,
+      usage: EMPTY_USAGE,
+      modelUsage: {},
+      permission_denials: [],
+      uuid: randomUUID(),
+      errors: [message],
+    }
+    process.stdout.write(jsonStringify(errorResult) + '\n')
+  } else {
+    process.stderr.write(message + '\n')
+  }
+}
+
+/**
+ * Removes an interrupted user message and its synthetic assistant sentinel
+ * from the message array. Used during gateway-triggered restarts to clean up
+ * the message history before re-enqueuing the interrupted prompt.
+ *
+ * @internal Exported for testing
+ */
+export function removeInterruptedMessage(
+  messages: Message[],
+  interruptedUserMessage: NormalizedUserMessage,
+): void {
+  const idx = messages.findIndex(m => m.uuid === interruptedUserMessage.uuid)
+  if (idx !== -1) {
+    // Remove the user message and the sentinel that immediately follows it.
+    // splice safely handles the case where idx is the last element.
+    messages.splice(idx, 2)
+  }
+}
+
+type LoadInitialMessagesResult = {
+  messages: Message[]
+  turnInterruptionState?: TurnInterruptionState
+  agentSetting?: string
+}
+
+async function loadInitialMessages(
+  setAppState: (f: (prev: AppState) => AppState) => void,
+  options: {
+    continue: boolean | undefined
+    teleport: string | true | null | undefined
+    resume: string | boolean | undefined
+    resumeSessionAt: string | undefined
+    forkSession: boolean | undefined
+    outputFormat: string | undefined
+    sessionStartHooksPromise?: ReturnType<typeof processSessionStartHooks>
+    restoredWorkerState: Promise<SessionExternalMetadata | null>
+  },
+): Promise<LoadInitialMessagesResult> {
+  const persistSession = !isSessionPersistenceDisabled()
+  // Handle continue in print mode
+  if (options.continue) {
+    try {
+      logEvent('tengu_continue_print', {})
+
+      const result = await loadConversationForResume(
+        undefined /* sessionId */,
+        undefined /* file path */,
+      )
+      if (result) {
+        // Match coordinator mode to the resumed session's mode
+        if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+          const warning = coordinatorModeModule.matchSessionMode(result.mode)
+          if (warning) {
+            process.stderr.write(warning + '\n')
+            // Refresh agent definitions to reflect the mode switch
+            const {
+              getAgentDefinitionsWithOverrides,
+              getActiveAgentsFromList,
+            } =
+              // eslint-disable-next-line @typescript-eslint/no-require-imports
+              require('../tools/AgentTool/loadAgentsDir.js') as typeof import('../tools/AgentTool/loadAgentsDir.js')
+            getAgentDefinitionsWithOverrides.cache.clear?.()
+            const freshAgentDefs = await getAgentDefinitionsWithOverrides(
+              getCwd(),
+            )
+
+            setAppState(prev => ({
+              ...prev,
+              agentDefinitions: {
+                ...freshAgentDefs,
+                allAgents: freshAgentDefs.allAgents,
+                activeAgents: getActiveAgentsFromList(freshAgentDefs.allAgents),
+              },
+            }))
+          }
+        }
+
+        // Reuse the resumed session's ID
+        if (!options.forkSession) {
+          if (result.sessionId) {
+            switchSession(
+              asSessionId(result.sessionId),
+              result.fullPath ? dirname(result.fullPath) : null,
+            )
+            if (persistSession) {
+              await resetSessionFilePointer()
+            }
+          }
+        }
+        restoreSessionStateFromLog(result, setAppState)
+
+        // Restore session metadata so it's re-appended on exit via reAppendSessionMetadata
+        restoreSessionMetadata(
+          options.forkSession
+            ? { ...result, worktreeSession: undefined }
+            : result,
+        )
+
+        // Write mode entry for the resumed session
+        if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+          saveMode(
+            coordinatorModeModule.isCoordinatorMode()
+              ? 'coordinator'
+              : 'normal',
+          )
+        }
+
+        return {
+          messages: result.messages,
+          turnInterruptionState: result.turnInterruptionState,
+          agentSetting: result.agentSetting,
+        }
+      }
+    } catch (error) {
+      logError(error)
+      gracefulShutdownSync(1)
+      return { messages: [] }
+    }
+  }
+
+  // Handle teleport in print mode
+  if (options.teleport) {
+    try {
+      if (!isPolicyAllowed('allow_remote_sessions')) {
+        throw new Error(
+          "Remote sessions are disabled by your organization's policy.",
+        )
+      }
+
+      logEvent('tengu_teleport_print', {})
+
+      if (typeof options.teleport !== 'string') {
+        throw new Error('No session ID provided for teleport')
+      }
+
+      const {
+        checkOutTeleportedSessionBranch,
+        processMessagesForTeleportResume,
+        teleportResumeCodeSession,
+        validateGitState,
+      } = await import('src/utils/teleport.js')
+      await validateGitState()
+      const teleportResult = await teleportResumeCodeSession(options.teleport)
+      const { branchError } = await checkOutTeleportedSessionBranch(
+        teleportResult.branch,
+      )
+      return {
+        messages: processMessagesForTeleportResume(
+          teleportResult.log,
+          branchError,
+        ),
+      }
+    } catch (error) {
+      logError(error)
+      gracefulShutdownSync(1)
+      return { messages: [] }
+    }
+  }
+
+  // Handle resume in print mode (accepts session ID or URL)
+  // URLs are [ANT-ONLY]
+  if (options.resume) {
+    try {
+      logEvent('tengu_resume_print', {})
+
+      // In print mode - we require a valid session ID, JSONL file or URL
+      const parsedSessionId = parseSessionIdentifier(
+        typeof options.resume === 'string' ? options.resume : '',
+      )
+      if (!parsedSessionId) {
+        let errorMessage =
+          'Error: --resume requires a valid session ID when used with --print. Usage: claude -p --resume <session-id>'
+        if (typeof options.resume === 'string') {
+          errorMessage += `. Session IDs must be in UUID format (e.g., 550e8400-e29b-41d4-a716-446655440000). Provided value "${options.resume}" is not a valid UUID`
+        }
+        emitLoadError(errorMessage, options.outputFormat)
+        gracefulShutdownSync(1)
+        return { messages: [] }
+      }
+
+      // Hydrate local transcript from remote before loading
+      if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
+        // Await restore alongside hydration so SSE catchup lands on
+        // restored state, not a fresh default.
+        const [, metadata] = await Promise.all([
+          hydrateFromCCRv2InternalEvents(parsedSessionId.sessionId),
+          options.restoredWorkerState,
+        ])
+        if (metadata) {
+          setAppState(externalMetadataToAppState(metadata))
+          if (typeof metadata.model === 'string') {
+            setMainLoopModelOverride(metadata.model)
+          }
+        }
+      } else if (
+        parsedSessionId.isUrl &&
+        parsedSessionId.ingressUrl &&
+        isEnvTruthy(process.env.ENABLE_SESSION_PERSISTENCE)
+      ) {
+        // v1: fetch session logs from Session Ingress
+        await hydrateRemoteSession(
+          parsedSessionId.sessionId,
+          parsedSessionId.ingressUrl,
+        )
+      }
+
+      // Load the conversation with the specified session ID
+      const result = await loadConversationForResume(
+        parsedSessionId.sessionId,
+        parsedSessionId.jsonlFile || undefined,
+      )
+
+      // hydrateFromCCRv2InternalEvents writes an empty transcript file for
+      // fresh sessions (writeFile(sessionFile, '') with zero events), so
+      // loadConversationForResume returns {messages: []} not null. Treat
+      // empty the same as null so SessionStart still fires.
+      if (!result || result.messages.length === 0) {
+        // For URL-based or CCR v2 resume, start with empty session (it was hydrated but empty)
+        if (
+          parsedSessionId.isUrl ||
+          isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)
+        ) {
+          // Execute SessionStart hooks for startup since we're starting a new session
+          return {
+            messages: await (options.sessionStartHooksPromise ??
+              processSessionStartHooks('startup')),
+          }
+        } else {
+          emitLoadError(
+            `No conversation found with session ID: ${parsedSessionId.sessionId}`,
+            options.outputFormat,
+          )
+          gracefulShutdownSync(1)
+          return { messages: [] }
+        }
+      }
+
+      // Handle resumeSessionAt feature
+      if (options.resumeSessionAt) {
+        const index = result.messages.findIndex(
+          m => m.uuid === options.resumeSessionAt,
+        )
+        if (index < 0) {
+          emitLoadError(
+            `No message found with message.uuid of: ${options.resumeSessionAt}`,
+            options.outputFormat,
+          )
+          gracefulShutdownSync(1)
+          return { messages: [] }
+        }
+
+        result.messages = index >= 0 ? result.messages.slice(0, index + 1) : []
+      }
+
+      // Match coordinator mode to the resumed session's mode
+      if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+        const warning = coordinatorModeModule.matchSessionMode(result.mode)
+        if (warning) {
+          process.stderr.write(warning + '\n')
+          // Refresh agent definitions to reflect the mode switch
+          const { getAgentDefinitionsWithOverrides, getActiveAgentsFromList } =
+            // eslint-disable-next-line @typescript-eslint/no-require-imports
+            require('../tools/AgentTool/loadAgentsDir.js') as typeof import('../tools/AgentTool/loadAgentsDir.js')
+          getAgentDefinitionsWithOverrides.cache.clear?.()
+          const freshAgentDefs = await getAgentDefinitionsWithOverrides(
+            getCwd(),
+          )
+
+          setAppState(prev => ({
+            ...prev,
+            agentDefinitions: {
+              ...freshAgentDefs,
+              allAgents: freshAgentDefs.allAgents,
+              activeAgents: getActiveAgentsFromList(freshAgentDefs.allAgents),
+            },
+          }))
+        }
+      }
+
+      // Reuse the resumed session's ID
+      if (!options.forkSession && result.sessionId) {
+        switchSession(
+          asSessionId(result.sessionId),
+          result.fullPath ? dirname(result.fullPath) : null,
+        )
+        if (persistSession) {
+          await resetSessionFilePointer()
+        }
+      }
+      restoreSessionStateFromLog(result, setAppState)
+
+      // Restore session metadata so it's re-appended on exit via reAppendSessionMetadata
+      restoreSessionMetadata(
+        options.forkSession
+          ? { ...result, worktreeSession: undefined }
+          : result,
+      )
+
+      // Write mode entry for the resumed session
+      if (feature('COORDINATOR_MODE') && coordinatorModeModule) {
+        saveMode(
+          coordinatorModeModule.isCoordinatorMode() ? 'coordinator' : 'normal',
+        )
+      }
+
+      return {
+        messages: result.messages,
+        turnInterruptionState: result.turnInterruptionState,
+        agentSetting: result.agentSetting,
+      }
+    } catch (error) {
+      logError(error)
+      const errorMessage =
+        error instanceof Error
+          ? `Failed to resume session: ${error.message}`
+          : 'Failed to resume session with --print mode'
+      emitLoadError(errorMessage, options.outputFormat)
+      gracefulShutdownSync(1)
+      return { messages: [] }
+    }
+  }
+
+  // Join the SessionStart hooks promise kicked in main.tsx (or run fresh if
+  // it wasn't kicked — e.g. --continue with no prior session falls through
+  // here with sessionStartHooksPromise undefined because main.tsx guards on continue)
+  return {
+    messages: await (options.sessionStartHooksPromise ??
+      processSessionStartHooks('startup')),
+  }
+}
+
+function getStructuredIO(
+  inputPrompt: string | AsyncIterable<string>,
+  options: {
+    sdkUrl: string | undefined
+    replayUserMessages?: boolean
+  },
+): StructuredIO {
+  let inputStream: AsyncIterable<string>
+  if (typeof inputPrompt === 'string') {
+    if (inputPrompt.trim() !== '') {
+      // Normalize to a streaming input.
+      inputStream = fromArray([
+        jsonStringify({
+          type: 'user',
+          session_id: '',
+          message: {
+            role: 'user',
+            content: inputPrompt,
+          },
+          parent_tool_use_id: null,
+        } satisfies SDKUserMessage),
+      ])
+    } else {
+      // Empty string - create empty stream
+      inputStream = fromArray([])
+    }
+  } else {
+    inputStream = inputPrompt
+  }
+
+  // Use RemoteIO if sdkUrl is provided, otherwise use regular StructuredIO
+  return options.sdkUrl
+    ? new RemoteIO(options.sdkUrl, inputStream, options.replayUserMessages)
+    : new StructuredIO(inputStream, options.replayUserMessages)
+}
+
+/**
+ * Handles unexpected permission responses by looking up the unresolved tool
+ * call in the transcript and enqueuing it for execution.
+ *
+ * Returns true if a permission was enqueued, false otherwise.
+ */
+export async function handleOrphanedPermissionResponse({
+  message,
+  setAppState,
+  onEnqueued,
+  handledToolUseIds,
+}: {
+  message: SDKControlResponse
+  setAppState: (f: (prev: AppState) => AppState) => void
+  onEnqueued?: () => void
+  handledToolUseIds: Set<string>
+}): Promise<boolean> {
+  if (
+    message.response.subtype === 'success' &&
+    message.response.response?.toolUseID &&
+    typeof message.response.response.toolUseID === 'string'
+  ) {
+    const permissionResult = message.response.response as PermissionResult
+    const { toolUseID } = permissionResult
+    if (!toolUseID) {
+      return false
+    }
+
+    logForDebugging(
+      `handleOrphanedPermissionResponse: received orphaned control_response for toolUseID=${toolUseID} request_id=${message.response.request_id}`,
+    )
+
+    // Prevent re-processing the same orphaned tool_use. Without this guard,
+    // duplicate control_response deliveries (e.g. from WebSocket reconnect)
+    // cause the same tool to be executed multiple times, producing duplicate
+    // tool_use IDs in the messages array and a 400 error from the API.
+    // Once corrupted, every retry accumulates more duplicates.
+    if (handledToolUseIds.has(toolUseID)) {
+      logForDebugging(
+        `handleOrphanedPermissionResponse: skipping duplicate orphaned permission for toolUseID=${toolUseID} (already handled)`,
+      )
+      return false
+    }
+
+    const assistantMessage = await findUnresolvedToolUse(toolUseID)
+    if (!assistantMessage) {
+      logForDebugging(
+        `handleOrphanedPermissionResponse: no unresolved tool_use found for toolUseID=${toolUseID} (already resolved in transcript)`,
+      )
+      return false
+    }
+
+    handledToolUseIds.add(toolUseID)
+    logForDebugging(
+      `handleOrphanedPermissionResponse: enqueuing orphaned permission for toolUseID=${toolUseID} messageID=${assistantMessage.message.id}`,
+    )
+    enqueue({
+      mode: 'orphaned-permission' as const,
+      value: [],
+      orphanedPermission: {
+        permissionResult,
+        assistantMessage,
+      },
+    })
+
+    onEnqueued?.()
+    return true
+  }
+  return false
+}
+
+export type DynamicMcpState = {
+  clients: MCPServerConnection[]
+  tools: Tools
+  configs: Record<string, ScopedMcpServerConfig>
+}
+
+/**
+ * Converts a process transport config to a scoped config.
+ * The types are structurally compatible, so we just add the scope.
+ */
+function toScopedConfig(
+  config: McpServerConfigForProcessTransport,
+): ScopedMcpServerConfig {
+  // McpServerConfigForProcessTransport is a subset of McpServerConfig
+  // (it excludes IDE-specific types like sse-ide and ws-ide)
+  // Adding scope makes it a valid ScopedMcpServerConfig
+  return { ...config, scope: 'dynamic' } as ScopedMcpServerConfig
+}
+
+/**
+ * State for SDK MCP servers that run in the SDK process.
+ */
+export type SdkMcpState = {
+  configs: Record<string, McpSdkServerConfig>
+  clients: MCPServerConnection[]
+  tools: Tools
+}
+
+/**
+ * Result of handleMcpSetServers - contains new state and response data.
+ */
+export type McpSetServersResult = {
+  response: SDKControlMcpSetServersResponse
+  newSdkState: SdkMcpState
+  newDynamicState: DynamicMcpState
+  sdkServersChanged: boolean
+}
+
+/**
+ * Handles mcp_set_servers requests by processing both SDK and process-based servers.
+ * SDK servers run in the SDK process; process-based servers are spawned by the CLI.
+ *
+ * Applies enterprise allowedMcpServers/deniedMcpServers policy — same filter as
+ * --mcp-config (see filterMcpServersByPolicy call in main.tsx). Without this,
+ * SDK V2 Query.setMcpServers() was a second policy bypass vector. Blocked servers
+ * are reported in response.errors so the SDK consumer knows why they weren't added.
+ */
+export async function handleMcpSetServers(
+  servers: Record<string, McpServerConfigForProcessTransport>,
+  sdkState: SdkMcpState,
+  dynamicState: DynamicMcpState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+): Promise<McpSetServersResult> {
+  // Enforce enterprise MCP policy on process-based servers (stdio/http/sse).
+  // Mirrors the --mcp-config filter in main.tsx — both user-controlled injection
+  // paths must have the same gate. type:'sdk' servers are exempt (SDK-managed,
+  // CLI never spawns/connects for them — see filterMcpServersByPolicy jsdoc).
+  // Blocked servers go into response.errors so the SDK caller sees why.
+  const { allowed: allowedServers, blocked } = filterMcpServersByPolicy(servers)
+  const policyErrors: Record<string, string> = {}
+  for (const name of blocked) {
+    policyErrors[name] =
+      'Blocked by enterprise policy (allowedMcpServers/deniedMcpServers)'
+  }
+
+  // Separate SDK servers from process-based servers
+  const sdkServers: Record<string, McpSdkServerConfig> = {}
+  const processServers: Record<string, McpServerConfigForProcessTransport> = {}
+
+  for (const [name, config] of Object.entries(allowedServers)) {
+    if (config.type === 'sdk') {
+      sdkServers[name] = config
+    } else {
+      processServers[name] = config
+    }
+  }
+
+  // Handle SDK servers
+  const currentSdkNames = new Set(Object.keys(sdkState.configs))
+  const newSdkNames = new Set(Object.keys(sdkServers))
+  const sdkAdded: string[] = []
+  const sdkRemoved: string[] = []
+
+  const newSdkConfigs = { ...sdkState.configs }
+  let newSdkClients = [...sdkState.clients]
+  let newSdkTools = [...sdkState.tools]
+
+  // Remove SDK servers no longer in desired state
+  for (const name of currentSdkNames) {
+    if (!newSdkNames.has(name)) {
+      const client = newSdkClients.find(c => c.name === name)
+      if (client && client.type === 'connected') {
+        await client.cleanup()
+      }
+      newSdkClients = newSdkClients.filter(c => c.name !== name)
+      const prefix = `mcp__${name}__`
+      newSdkTools = newSdkTools.filter(t => !t.name.startsWith(prefix))
+      delete newSdkConfigs[name]
+      sdkRemoved.push(name)
+    }
+  }
+
+  // Add new SDK servers as pending - they'll be upgraded to connected
+  // when updateSdkMcp() runs on the next query
+  for (const [name, config] of Object.entries(sdkServers)) {
+    if (!currentSdkNames.has(name)) {
+      newSdkConfigs[name] = config
+      const pendingClient: MCPServerConnection = {
+        type: 'pending',
+        name,
+        config: { ...config, scope: 'dynamic' as const },
+      }
+      newSdkClients = [...newSdkClients, pendingClient]
+      sdkAdded.push(name)
+    }
+  }
+
+  // Handle process-based servers
+  const processResult = await reconcileMcpServers(
+    processServers,
+    dynamicState,
+    setAppState,
+  )
+
+  return {
+    response: {
+      added: [...sdkAdded, ...processResult.response.added],
+      removed: [...sdkRemoved, ...processResult.response.removed],
+      errors: { ...policyErrors, ...processResult.response.errors },
+    },
+    newSdkState: {
+      configs: newSdkConfigs,
+      clients: newSdkClients,
+      tools: newSdkTools,
+    },
+    newDynamicState: processResult.newState,
+    sdkServersChanged: sdkAdded.length > 0 || sdkRemoved.length > 0,
+  }
+}
+
+/**
+ * Reconciles the current set of dynamic MCP servers with a new desired state.
+ * Handles additions, removals, and config changes.
+ */
+export async function reconcileMcpServers(
+  desiredConfigs: Record<string, McpServerConfigForProcessTransport>,
+  currentState: DynamicMcpState,
+  setAppState: (f: (prev: AppState) => AppState) => void,
+): Promise<{
+  response: SDKControlMcpSetServersResponse
+  newState: DynamicMcpState
+}> {
+  const currentNames = new Set(Object.keys(currentState.configs))
+  const desiredNames = new Set(Object.keys(desiredConfigs))
+
+  const toRemove = [...currentNames].filter(n => !desiredNames.has(n))
+  const toAdd = [...desiredNames].filter(n => !currentNames.has(n))
+
+  // Check for config changes (same name, different config)
+  const toCheck = [...currentNames].filter(n => desiredNames.has(n))
+  const toReplace = toCheck.filter(name => {
+    const currentConfig = currentState.configs[name]
+    const desiredConfigRaw = desiredConfigs[name]
+    if (!currentConfig || !desiredConfigRaw) return true
+    const desiredConfig = toScopedConfig(desiredConfigRaw)
+    return !areMcpConfigsEqual(currentConfig, desiredConfig)
+  })
+
+  const removed: string[] = []
+  const added: string[] = []
+  const errors: Record<string, string> = {}
+
+  let newClients = [...currentState.clients]
+  let newTools = [...currentState.tools]
+
+  // Remove old servers (including ones being replaced)
+  for (const name of [...toRemove, ...toReplace]) {
+    const client = newClients.find(c => c.name === name)
+    const config = currentState.configs[name]
+    if (client && config) {
+      if (client.type === 'connected') {
+        try {
+          await client.cleanup()
+        } catch (e) {
+          logError(e)
+        }
+      }
+      // Clear the memoization cache
+      await clearServerCache(name, config)
+    }
+
+    // Remove tools from this server
+    const prefix = `mcp__${name}__`
+    newTools = newTools.filter(t => !t.name.startsWith(prefix))
+
+    // Remove from clients list
+    newClients = newClients.filter(c => c.name !== name)
+
+    // Track removal (only for actually removed, not replaced)
+    if (toRemove.includes(name)) {
+      removed.push(name)
+    }
+  }
+
+  // Add new servers (including replacements)
+  for (const name of [...toAdd, ...toReplace]) {
+    const config = desiredConfigs[name]
+    if (!config) continue
+    const scopedConfig = toScopedConfig(config)
+
+    // SDK servers are managed by the SDK process, not the CLI.
+    // Just track them without trying to connect.
+    if (config.type === 'sdk') {
+      added.push(name)
+      continue
+    }
+
+    try {
+      const client = await connectToServer(name, scopedConfig)
+      newClients.push(client)
+
+      if (client.type === 'connected') {
+        const serverTools = await fetchToolsForClient(client)
+        newTools.push(...serverTools)
+      } else if (client.type === 'failed') {
+        errors[name] = client.error || 'Connection failed'
+      }
+
+      added.push(name)
+    } catch (e) {
+      const err = toError(e)
+      errors[name] = err.message
+      logError(err)
+    }
+  }
+
+  // Build new configs
+  const newConfigs: Record<string, ScopedMcpServerConfig> = {}
+  for (const name of desiredNames) {
+    const config = desiredConfigs[name]
+    if (config) {
+      newConfigs[name] = toScopedConfig(config)
+    }
+  }
+
+  const newState: DynamicMcpState = {
+    clients: newClients,
+    tools: newTools,
+    configs: newConfigs,
+  }
+
+  // Update AppState with the new tools
+  setAppState(prev => {
+    // Get all dynamic server names (current + new)
+    const allDynamicServerNames = new Set([
+      ...Object.keys(currentState.configs),
+      ...Object.keys(newConfigs),
+    ])
+
+    // Remove old dynamic tools
+    const nonDynamicTools = prev.mcp.tools.filter(t => {
+      for (const serverName of allDynamicServerNames) {
+        if (t.name.startsWith(`mcp__${serverName}__`)) {
+          return false
+        }
+      }
+      return true
+    })
+
+    // Remove old dynamic clients
+    const nonDynamicClients = prev.mcp.clients.filter(c => {
+      return !allDynamicServerNames.has(c.name)
+    })
+
+    return {
+      ...prev,
+      mcp: {
+        ...prev.mcp,
+        tools: [...nonDynamicTools, ...newTools],
+        clients: [...nonDynamicClients, ...newClients],
+      },
+    }
+  })
+
+  return {
+    response: { added, removed, errors },
+    newState,
+  }
+}

+ 255 - 0
src/cli/remoteIO.ts

@@ -0,0 +1,255 @@
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { PassThrough } from 'stream'
+import { URL } from 'url'
+import { getSessionId } from '../bootstrap/state.js'
+import { getPollIntervalConfig } from '../bridge/pollConfig.js'
+import { registerCleanup } from '../utils/cleanupRegistry.js'
+import { setCommandLifecycleListener } from '../utils/commandLifecycle.js'
+import { isDebugMode, logForDebugging } from '../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
+import { isEnvTruthy } from '../utils/envUtils.js'
+import { errorMessage } from '../utils/errors.js'
+import { gracefulShutdown } from '../utils/gracefulShutdown.js'
+import { logError } from '../utils/log.js'
+import { writeToStdout } from '../utils/process.js'
+import { getSessionIngressAuthToken } from '../utils/sessionIngressAuth.js'
+import {
+  setSessionMetadataChangedListener,
+  setSessionStateChangedListener,
+} from '../utils/sessionState.js'
+import {
+  setInternalEventReader,
+  setInternalEventWriter,
+} from '../utils/sessionStorage.js'
+import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
+import { StructuredIO } from './structuredIO.js'
+import { CCRClient, CCRInitError } from './transports/ccrClient.js'
+import { SSETransport } from './transports/SSETransport.js'
+import type { Transport } from './transports/Transport.js'
+import { getTransportForUrl } from './transports/transportUtils.js'
+
+/**
+ * Bidirectional streaming for SDK mode with session tracking
+ * Supports WebSocket transport
+ */
+export class RemoteIO extends StructuredIO {
+  private url: URL
+  private transport: Transport
+  private inputStream: PassThrough
+  private readonly isBridge: boolean = false
+  private readonly isDebug: boolean = false
+  private ccrClient: CCRClient | null = null
+  private keepAliveTimer: ReturnType<typeof setInterval> | null = null
+
+  constructor(
+    streamUrl: string,
+    initialPrompt?: AsyncIterable<string>,
+    replayUserMessages?: boolean,
+  ) {
+    const inputStream = new PassThrough({ encoding: 'utf8' })
+    super(inputStream, replayUserMessages)
+    this.inputStream = inputStream
+    this.url = new URL(streamUrl)
+
+    // Prepare headers with session token if available
+    const headers: Record<string, string> = {}
+    const sessionToken = getSessionIngressAuthToken()
+    if (sessionToken) {
+      headers['Authorization'] = `Bearer ${sessionToken}`
+    } else {
+      logForDebugging('[remote-io] No session ingress token available', {
+        level: 'error',
+      })
+    }
+
+    // Add environment runner version if available (set by Environment Manager)
+    const erVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
+    if (erVersion) {
+      headers['x-environment-runner-version'] = erVersion
+    }
+
+    // Provide a callback that re-reads the session token dynamically.
+    // When the parent process refreshes the token (via token file or env var),
+    // the transport can pick it up on reconnection.
+    const refreshHeaders = (): Record<string, string> => {
+      const h: Record<string, string> = {}
+      const freshToken = getSessionIngressAuthToken()
+      if (freshToken) {
+        h['Authorization'] = `Bearer ${freshToken}`
+      }
+      const freshErVersion = process.env.CLAUDE_CODE_ENVIRONMENT_RUNNER_VERSION
+      if (freshErVersion) {
+        h['x-environment-runner-version'] = freshErVersion
+      }
+      return h
+    }
+
+    // Get appropriate transport based on URL protocol
+    this.transport = getTransportForUrl(
+      this.url,
+      headers,
+      getSessionId(),
+      refreshHeaders,
+    )
+
+    // Set up data callback
+    this.isBridge = process.env.CLAUDE_CODE_ENVIRONMENT_KIND === 'bridge'
+    this.isDebug = isDebugMode()
+    this.transport.setOnData((data: string) => {
+      this.inputStream.write(data)
+      if (this.isBridge && this.isDebug) {
+        writeToStdout(data.endsWith('\n') ? data : data + '\n')
+      }
+    })
+
+    // Set up close callback to handle connection failures
+    this.transport.setOnClose(() => {
+      // End the input stream to trigger graceful shutdown
+      this.inputStream.end()
+    })
+
+    // Initialize CCR v2 client (heartbeats, epoch, state reporting, event writes).
+    // The CCRClient constructor wires the SSE received-ack handler
+    // synchronously, so new CCRClient() MUST run before transport.connect() —
+    // otherwise early SSE frames hit an unwired onEventCallback and their
+    // 'received' delivery acks are silently dropped.
+    if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
+      // CCR v2 is SSE+POST by definition. getTransportForUrl returns
+      // SSETransport under the same env var, but the two checks live in
+      // different files — assert the invariant so a future decoupling
+      // fails loudly here instead of confusingly inside CCRClient.
+      if (!(this.transport instanceof SSETransport)) {
+        throw new Error(
+          'CCR v2 requires SSETransport; check getTransportForUrl',
+        )
+      }
+      this.ccrClient = new CCRClient(this.transport, this.url)
+      const init = this.ccrClient.initialize()
+      this.restoredWorkerState = init.catch(() => null)
+      init.catch((error: unknown) => {
+        logForDiagnosticsNoPII('error', 'cli_worker_lifecycle_init_failed', {
+          reason: error instanceof CCRInitError ? error.reason : 'unknown',
+        })
+        logError(
+          new Error(`CCRClient initialization failed: ${errorMessage(error)}`),
+        )
+        void gracefulShutdown(1, 'other')
+      })
+      registerCleanup(async () => this.ccrClient?.close())
+
+      // Register internal event writer for transcript persistence.
+      // When set, sessionStorage writes transcript messages as CCR v2
+      // internal events instead of v1 Session Ingress.
+      setInternalEventWriter((eventType, payload, options) =>
+        this.ccrClient!.writeInternalEvent(eventType, payload, options),
+      )
+
+      // Register internal event readers for session resume.
+      // When set, hydrateFromCCRv2InternalEvents() can fetch foreground
+      // and subagent internal events to reconstruct conversation state.
+      setInternalEventReader(
+        () => this.ccrClient!.readInternalEvents(),
+        () => this.ccrClient!.readSubagentInternalEvents(),
+      )
+
+      const LIFECYCLE_TO_DELIVERY = {
+        started: 'processing',
+        completed: 'processed',
+      } as const
+      setCommandLifecycleListener((uuid, state) => {
+        this.ccrClient?.reportDelivery(uuid, LIFECYCLE_TO_DELIVERY[state])
+      })
+      setSessionStateChangedListener((state, details) => {
+        this.ccrClient?.reportState(state, details)
+      })
+      setSessionMetadataChangedListener(metadata => {
+        this.ccrClient?.reportMetadata(metadata)
+      })
+    }
+
+    // Start connection only after all callbacks are wired (setOnData above,
+    // setOnEvent inside new CCRClient() when CCR v2 is enabled).
+    void this.transport.connect()
+
+    // Push a silent keep_alive frame on a fixed interval so upstream
+    // proxies and the session-ingress layer don't GC an otherwise-idle
+    // remote control session. The keep_alive type is filtered before
+    // reaching any client UI (Query.ts drops it; structuredIO.ts drops it;
+    // web/iOS/Android never see it in their message loop). Interval comes
+    // from GrowthBook (tengu_bridge_poll_interval_config
+    // session_keepalive_interval_v2_ms, default 120s); 0 = disabled.
+    // Bridge-only: fixes Envoy idle timeout on bridge-topology sessions
+    // (#21931). byoc workers ran without this before #21931 and do not
+    // need it — different network path.
+    const keepAliveIntervalMs =
+      getPollIntervalConfig().session_keepalive_interval_v2_ms
+    if (this.isBridge && keepAliveIntervalMs > 0) {
+      this.keepAliveTimer = setInterval(() => {
+        logForDebugging('[remote-io] keep_alive sent')
+        void this.write({ type: 'keep_alive' }).catch(err => {
+          logForDebugging(
+            `[remote-io] keep_alive write failed: ${errorMessage(err)}`,
+          )
+        })
+      }, keepAliveIntervalMs)
+      this.keepAliveTimer.unref?.()
+    }
+
+    // Register for graceful shutdown cleanup
+    registerCleanup(async () => this.close())
+
+    // If initial prompt is provided, send it through the input stream
+    if (initialPrompt) {
+      // Convert the initial prompt to the input stream format.
+      // Chunks from stdin may already contain trailing newlines, so strip
+      // them before appending our own to avoid double-newline issues that
+      // cause structuredIO to parse empty lines. String() handles both
+      // string chunks and Buffer objects from process.stdin.
+      const stream = this.inputStream
+      void (async () => {
+        for await (const chunk of initialPrompt) {
+          stream.write(String(chunk).replace(/\n$/, '') + '\n')
+        }
+      })()
+    }
+  }
+
+  override flushInternalEvents(): Promise<void> {
+    return this.ccrClient?.flushInternalEvents() ?? Promise.resolve()
+  }
+
+  override get internalEventsPending(): number {
+    return this.ccrClient?.internalEventsPending ?? 0
+  }
+
+  /**
+   * Send output to the transport.
+   * In bridge mode, control_request messages are always echoed to stdout so the
+   * bridge parent can detect permission requests. Other messages are echoed only
+   * in debug mode.
+   */
+  async write(message: StdoutMessage): Promise<void> {
+    if (this.ccrClient) {
+      await this.ccrClient.writeEvent(message)
+    } else {
+      await this.transport.write(message)
+    }
+    if (this.isBridge) {
+      if (message.type === 'control_request' || this.isDebug) {
+        writeToStdout(ndjsonSafeStringify(message) + '\n')
+      }
+    }
+  }
+
+  /**
+   * Clean up connections gracefully
+   */
+  close(): void {
+    if (this.keepAliveTimer) {
+      clearInterval(this.keepAliveTimer)
+      this.keepAliveTimer = null
+    }
+    this.transport.close()
+    this.inputStream.end()
+  }
+}

+ 859 - 0
src/cli/structuredIO.ts

@@ -0,0 +1,859 @@
+import { feature } from 'bun:bundle'
+import type {
+  ElicitResult,
+  JSONRPCMessage,
+} from '@modelcontextprotocol/sdk/types.js'
+import { randomUUID } from 'crypto'
+import type { AssistantMessage } from 'src//types/message.js'
+import type {
+  HookInput,
+  HookJSONOutput,
+  PermissionUpdate,
+  SDKMessage,
+  SDKUserMessage,
+} from 'src/entrypoints/agentSdkTypes.js'
+import { SDKControlElicitationResponseSchema } from 'src/entrypoints/sdk/controlSchemas.js'
+import type {
+  SDKControlRequest,
+  SDKControlResponse,
+  StdinMessage,
+  StdoutMessage,
+} from 'src/entrypoints/sdk/controlTypes.js'
+import type { CanUseToolFn } from 'src/hooks/useCanUseTool.js'
+import type { Tool, ToolUseContext } from 'src/Tool.js'
+import { type HookCallback, hookJSONOutputSchema } from 'src/types/hooks.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { logForDiagnosticsNoPII } from 'src/utils/diagLogs.js'
+import { AbortError } from 'src/utils/errors.js'
+import {
+  type Output as PermissionToolOutput,
+  permissionPromptToolResultToPermissionDecision,
+  outputSchema as permissionToolOutputSchema,
+} from 'src/utils/permissions/PermissionPromptToolResultSchema.js'
+import type {
+  PermissionDecision,
+  PermissionDecisionReason,
+} from 'src/utils/permissions/PermissionResult.js'
+import { hasPermissionsToUseTool } from 'src/utils/permissions/permissions.js'
+import { writeToStdout } from 'src/utils/process.js'
+import { jsonStringify } from 'src/utils/slowOperations.js'
+import { z } from 'zod/v4'
+import { notifyCommandLifecycle } from '../utils/commandLifecycle.js'
+import { normalizeControlMessageKeys } from '../utils/controlMessageCompat.js'
+import { executePermissionRequestHooks } from '../utils/hooks.js'
+import {
+  applyPermissionUpdates,
+  persistPermissionUpdates,
+} from '../utils/permissions/PermissionUpdate.js'
+import {
+  notifySessionStateChanged,
+  type RequiresActionDetails,
+  type SessionExternalMetadata,
+} from '../utils/sessionState.js'
+import { jsonParse } from '../utils/slowOperations.js'
+import { Stream } from '../utils/stream.js'
+import { ndjsonSafeStringify } from './ndjsonSafeStringify.js'
+
+/**
+ * Synthetic tool name used when forwarding sandbox network permission
+ * requests via the can_use_tool control_request protocol. SDK hosts
+ * see this as a normal tool permission prompt.
+ */
+export const SANDBOX_NETWORK_ACCESS_TOOL_NAME = 'SandboxNetworkAccess'
+
+function serializeDecisionReason(
+  reason: PermissionDecisionReason | undefined,
+): string | undefined {
+  if (!reason) {
+    return undefined
+  }
+
+  if (
+    (feature('BASH_CLASSIFIER') || feature('TRANSCRIPT_CLASSIFIER')) &&
+    reason.type === 'classifier'
+  ) {
+    return reason.reason
+  }
+  switch (reason.type) {
+    case 'rule':
+    case 'mode':
+    case 'subcommandResults':
+    case 'permissionPromptTool':
+      return undefined
+    case 'hook':
+    case 'asyncAgent':
+    case 'sandboxOverride':
+    case 'workingDir':
+    case 'safetyCheck':
+    case 'other':
+      return reason.reason
+  }
+}
+
+function buildRequiresActionDetails(
+  tool: Tool,
+  input: Record<string, unknown>,
+  toolUseID: string,
+  requestId: string,
+): RequiresActionDetails {
+  // Per-tool summary methods may throw on malformed input; permission
+  // handling must not break because of a bad description.
+  let description: string
+  try {
+    description =
+      tool.getActivityDescription?.(input) ??
+      tool.getToolUseSummary?.(input) ??
+      tool.userFacingName(input)
+  } catch {
+    description = tool.name
+  }
+  return {
+    tool_name: tool.name,
+    action_description: description,
+    tool_use_id: toolUseID,
+    request_id: requestId,
+    input,
+  }
+}
+
+type PendingRequest<T> = {
+  resolve: (result: T) => void
+  reject: (error: unknown) => void
+  schema?: z.Schema
+  request: SDKControlRequest
+}
+
+/**
+ * Provides a structured way to read and write SDK messages from stdio,
+ * capturing the SDK protocol.
+ */
+// Maximum number of resolved tool_use IDs to track. Once exceeded, the oldest
+// entry is evicted. This bounds memory in very long sessions while keeping
+// enough history to catch duplicate control_response deliveries.
+const MAX_RESOLVED_TOOL_USE_IDS = 1000
+
+export class StructuredIO {
+  readonly structuredInput: AsyncGenerator<StdinMessage | SDKMessage>
+  private readonly pendingRequests = new Map<string, PendingRequest<unknown>>()
+
+  // CCR external_metadata read back on worker start; null when the
+  // transport doesn't restore. Assigned by RemoteIO.
+  restoredWorkerState: Promise<SessionExternalMetadata | null> =
+    Promise.resolve(null)
+
+  private inputClosed = false
+  private unexpectedResponseCallback?: (
+    response: SDKControlResponse,
+  ) => Promise<void>
+
+  // Tracks tool_use IDs that have been resolved through the normal permission
+  // flow (or aborted by a hook). When a duplicate control_response arrives
+  // after the original was already handled, this Set prevents the orphan
+  // handler from re-processing it — which would push duplicate assistant
+  // messages into mutableMessages and cause a 400 "tool_use ids must be unique"
+  // error from the API.
+  private readonly resolvedToolUseIds = new Set<string>()
+  private prependedLines: string[] = []
+  private onControlRequestSent?: (request: SDKControlRequest) => void
+  private onControlRequestResolved?: (requestId: string) => void
+
+  // sendRequest() and print.ts both enqueue here; the drain loop is the
+  // only writer. Prevents control_request from overtaking queued stream_events.
+  readonly outbound = new Stream<StdoutMessage>()
+
+  constructor(
+    private readonly input: AsyncIterable<string>,
+    private readonly replayUserMessages?: boolean,
+  ) {
+    this.input = input
+    this.structuredInput = this.read()
+  }
+
+  /**
+   * Records a tool_use ID as resolved so that late/duplicate control_response
+   * messages for the same tool are ignored by the orphan handler.
+   */
+  private trackResolvedToolUseId(request: SDKControlRequest): void {
+    if (request.request.subtype === 'can_use_tool') {
+      this.resolvedToolUseIds.add(request.request.tool_use_id)
+      if (this.resolvedToolUseIds.size > MAX_RESOLVED_TOOL_USE_IDS) {
+        // Evict the oldest entry (Sets iterate in insertion order)
+        const first = this.resolvedToolUseIds.values().next().value
+        if (first !== undefined) {
+          this.resolvedToolUseIds.delete(first)
+        }
+      }
+    }
+  }
+
+  /** Flush pending internal events. No-op for non-remote IO. Overridden by RemoteIO. */
+  flushInternalEvents(): Promise<void> {
+    return Promise.resolve()
+  }
+
+  /** Internal-event queue depth. Overridden by RemoteIO; zero otherwise. */
+  get internalEventsPending(): number {
+    return 0
+  }
+
+  /**
+   * Queue a user turn to be yielded before the next message from this.input.
+   * Works before iteration starts and mid-stream — read() re-checks
+   * prependedLines between each yielded message.
+   */
+  prependUserMessage(content: string): void {
+    this.prependedLines.push(
+      jsonStringify({
+        type: 'user',
+        session_id: '',
+        message: { role: 'user', content },
+        parent_tool_use_id: null,
+      } satisfies SDKUserMessage) + '\n',
+    )
+  }
+
+  private async *read() {
+    let content = ''
+
+    // Called once before for-await (an empty this.input otherwise skips the
+    // loop body entirely), then again per block. prependedLines re-check is
+    // inside the while so a prepend pushed between two messages in the SAME
+    // block still lands first.
+    const splitAndProcess = async function* (this: StructuredIO) {
+      for (;;) {
+        if (this.prependedLines.length > 0) {
+          content = this.prependedLines.join('') + content
+          this.prependedLines = []
+        }
+        const newline = content.indexOf('\n')
+        if (newline === -1) break
+        const line = content.slice(0, newline)
+        content = content.slice(newline + 1)
+        const message = await this.processLine(line)
+        if (message) {
+          logForDiagnosticsNoPII('info', 'cli_stdin_message_parsed', {
+            type: message.type,
+          })
+          yield message
+        }
+      }
+    }.bind(this)
+
+    yield* splitAndProcess()
+
+    for await (const block of this.input) {
+      content += block
+      yield* splitAndProcess()
+    }
+    if (content) {
+      const message = await this.processLine(content)
+      if (message) {
+        yield message
+      }
+    }
+    this.inputClosed = true
+    for (const request of this.pendingRequests.values()) {
+      // Reject all pending requests if the input stream
+      request.reject(
+        new Error('Tool permission stream closed before response received'),
+      )
+    }
+  }
+
+  getPendingPermissionRequests() {
+    return Array.from(this.pendingRequests.values())
+      .map(entry => entry.request)
+      .filter(pr => pr.request.subtype === 'can_use_tool')
+  }
+
+  setUnexpectedResponseCallback(
+    callback: (response: SDKControlResponse) => Promise<void>,
+  ): void {
+    this.unexpectedResponseCallback = callback
+  }
+
+  /**
+   * Inject a control_response message to resolve a pending permission request.
+   * Used by the bridge to feed permission responses from claude.ai into the
+   * SDK permission flow.
+   *
+   * Also sends a control_cancel_request to the SDK consumer so its canUseTool
+   * callback is aborted via the signal — otherwise the callback hangs.
+   */
+  injectControlResponse(response: SDKControlResponse): void {
+    const requestId = response.response?.request_id
+    if (!requestId) return
+    const request = this.pendingRequests.get(requestId)
+    if (!request) return
+    this.trackResolvedToolUseId(request.request)
+    this.pendingRequests.delete(requestId)
+    // Cancel the SDK consumer's canUseTool callback — the bridge won.
+    void this.write({
+      type: 'control_cancel_request',
+      request_id: requestId,
+    })
+    if (response.response.subtype === 'error') {
+      request.reject(new Error(response.response.error))
+    } else {
+      const result = response.response.response
+      if (request.schema) {
+        try {
+          request.resolve(request.schema.parse(result))
+        } catch (error) {
+          request.reject(error)
+        }
+      } else {
+        request.resolve({})
+      }
+    }
+  }
+
+  /**
+   * Register a callback invoked whenever a can_use_tool control_request
+   * is written to stdout. Used by the bridge to forward permission
+   * requests to claude.ai.
+   */
+  setOnControlRequestSent(
+    callback: ((request: SDKControlRequest) => void) | undefined,
+  ): void {
+    this.onControlRequestSent = callback
+  }
+
+  /**
+   * Register a callback invoked when a can_use_tool control_response arrives
+   * from the SDK consumer (via stdin). Used by the bridge to cancel the
+   * stale permission prompt on claude.ai when the SDK consumer wins the race.
+   */
+  setOnControlRequestResolved(
+    callback: ((requestId: string) => void) | undefined,
+  ): void {
+    this.onControlRequestResolved = callback
+  }
+
+  private async processLine(
+    line: string,
+  ): Promise<StdinMessage | SDKMessage | undefined> {
+    // Skip empty lines (e.g. from double newlines in piped stdin)
+    if (!line) {
+      return undefined
+    }
+    try {
+      const message = normalizeControlMessageKeys(jsonParse(line)) as
+        | StdinMessage
+        | SDKMessage
+      if (message.type === 'keep_alive') {
+        // Silently ignore keep-alive messages
+        return undefined
+      }
+      if (message.type === 'update_environment_variables') {
+        // Apply environment variable updates directly to process.env.
+        // Used by bridge session runner for auth token refresh
+        // (CLAUDE_CODE_SESSION_ACCESS_TOKEN) which must be readable
+        // by the REPL process itself, not just child Bash commands.
+        const keys = Object.keys(message.variables)
+        for (const [key, value] of Object.entries(message.variables)) {
+          process.env[key] = value
+        }
+        logForDebugging(
+          `[structuredIO] applied update_environment_variables: ${keys.join(', ')}`,
+        )
+        return undefined
+      }
+      if (message.type === 'control_response') {
+        // Close lifecycle for every control_response, including duplicates
+        // and orphans — orphans don't yield to print.ts's main loop, so this
+        // is the only path that sees them. uuid is server-injected into the
+        // payload.
+        const uuid =
+          'uuid' in message && typeof message.uuid === 'string'
+            ? message.uuid
+            : undefined
+        if (uuid) {
+          notifyCommandLifecycle(uuid, 'completed')
+        }
+        const request = this.pendingRequests.get(message.response.request_id)
+        if (!request) {
+          // Check if this tool_use was already resolved through the normal
+          // permission flow. Duplicate control_response deliveries (e.g. from
+          // WebSocket reconnects) arrive after the original was handled, and
+          // re-processing them would push duplicate assistant messages into
+          // the conversation, causing API 400 errors.
+          const responsePayload =
+            message.response.subtype === 'success'
+              ? message.response.response
+              : undefined
+          const toolUseID = responsePayload?.toolUseID
+          if (
+            typeof toolUseID === 'string' &&
+            this.resolvedToolUseIds.has(toolUseID)
+          ) {
+            logForDebugging(
+              `Ignoring duplicate control_response for already-resolved toolUseID=${toolUseID} request_id=${message.response.request_id}`,
+            )
+            return undefined
+          }
+          if (this.unexpectedResponseCallback) {
+            await this.unexpectedResponseCallback(message)
+          }
+          return undefined // Ignore responses for requests we don't know about
+        }
+        this.trackResolvedToolUseId(request.request)
+        this.pendingRequests.delete(message.response.request_id)
+        // Notify the bridge when the SDK consumer resolves a can_use_tool
+        // request, so it can cancel the stale permission prompt on claude.ai.
+        if (
+          request.request.request.subtype === 'can_use_tool' &&
+          this.onControlRequestResolved
+        ) {
+          this.onControlRequestResolved(message.response.request_id)
+        }
+
+        if (message.response.subtype === 'error') {
+          request.reject(new Error(message.response.error))
+          return undefined
+        }
+        const result = message.response.response
+        if (request.schema) {
+          try {
+            request.resolve(request.schema.parse(result))
+          } catch (error) {
+            request.reject(error)
+          }
+        } else {
+          request.resolve({})
+        }
+        // Propagate control responses when replay is enabled
+        if (this.replayUserMessages) {
+          return message
+        }
+        return undefined
+      }
+      if (
+        message.type !== 'user' &&
+        message.type !== 'control_request' &&
+        message.type !== 'assistant' &&
+        message.type !== 'system'
+      ) {
+        logForDebugging(`Ignoring unknown message type: ${message.type}`, {
+          level: 'warn',
+        })
+        return undefined
+      }
+      if (message.type === 'control_request') {
+        if (!message.request) {
+          exitWithMessage(`Error: Missing request on control_request`)
+        }
+        return message
+      }
+      if (message.type === 'assistant' || message.type === 'system') {
+        return message
+      }
+      if (message.message.role !== 'user') {
+        exitWithMessage(
+          `Error: Expected message role 'user', got '${message.message.role}'`,
+        )
+      }
+      return message
+    } catch (error) {
+      // biome-ignore lint/suspicious/noConsole:: intentional console output
+      console.error(`Error parsing streaming input line: ${line}: ${error}`)
+      // eslint-disable-next-line custom-rules/no-process-exit
+      process.exit(1)
+    }
+  }
+
+  async write(message: StdoutMessage): Promise<void> {
+    writeToStdout(ndjsonSafeStringify(message) + '\n')
+  }
+
+  private async sendRequest<Response>(
+    request: SDKControlRequest['request'],
+    schema: z.Schema,
+    signal?: AbortSignal,
+    requestId: string = randomUUID(),
+  ): Promise<Response> {
+    const message: SDKControlRequest = {
+      type: 'control_request',
+      request_id: requestId,
+      request,
+    }
+    if (this.inputClosed) {
+      throw new Error('Stream closed')
+    }
+    if (signal?.aborted) {
+      throw new Error('Request aborted')
+    }
+    this.outbound.enqueue(message)
+    if (request.subtype === 'can_use_tool' && this.onControlRequestSent) {
+      this.onControlRequestSent(message)
+    }
+    const aborted = () => {
+      this.outbound.enqueue({
+        type: 'control_cancel_request',
+        request_id: requestId,
+      })
+      // Immediately reject the outstanding promise, without
+      // waiting for the host to acknowledge the cancellation.
+      const request = this.pendingRequests.get(requestId)
+      if (request) {
+        // Track the tool_use ID as resolved before rejecting, so that a
+        // late response from the host is ignored by the orphan handler.
+        this.trackResolvedToolUseId(request.request)
+        request.reject(new AbortError())
+      }
+    }
+    if (signal) {
+      signal.addEventListener('abort', aborted, {
+        once: true,
+      })
+    }
+    try {
+      return await new Promise<Response>((resolve, reject) => {
+        this.pendingRequests.set(requestId, {
+          request: {
+            type: 'control_request',
+            request_id: requestId,
+            request,
+          },
+          resolve: result => {
+            resolve(result as Response)
+          },
+          reject,
+          schema,
+        })
+      })
+    } finally {
+      if (signal) {
+        signal.removeEventListener('abort', aborted)
+      }
+      this.pendingRequests.delete(requestId)
+    }
+  }
+
+  createCanUseTool(
+    onPermissionPrompt?: (details: RequiresActionDetails) => void,
+  ): CanUseToolFn {
+    return async (
+      tool: Tool,
+      input: { [key: string]: unknown },
+      toolUseContext: ToolUseContext,
+      assistantMessage: AssistantMessage,
+      toolUseID: string,
+      forceDecision?: PermissionDecision,
+    ): Promise<PermissionDecision> => {
+      const mainPermissionResult =
+        forceDecision ??
+        (await hasPermissionsToUseTool(
+          tool,
+          input,
+          toolUseContext,
+          assistantMessage,
+          toolUseID,
+        ))
+      // If the tool is allowed or denied, return the result
+      if (
+        mainPermissionResult.behavior === 'allow' ||
+        mainPermissionResult.behavior === 'deny'
+      ) {
+        return mainPermissionResult
+      }
+
+      // Run PermissionRequest hooks in parallel with the SDK permission
+      // prompt.  In the terminal CLI, hooks race against the interactive
+      // prompt so that e.g. a hook with --delay 20 doesn't block the UI.
+      // We need the same behavior here: the SDK host (VS Code, etc.) shows
+      // its permission dialog immediately while hooks run in the background.
+      // Whichever resolves first wins; the loser is cancelled/ignored.
+
+      // AbortController used to cancel the SDK request if a hook decides first
+      const hookAbortController = new AbortController()
+      const parentSignal = toolUseContext.abortController.signal
+      // Forward parent abort to our local controller
+      const onParentAbort = () => hookAbortController.abort()
+      parentSignal.addEventListener('abort', onParentAbort, { once: true })
+
+      try {
+        // Start the hook evaluation (runs in background)
+        const hookPromise = executePermissionRequestHooksForSDK(
+          tool.name,
+          toolUseID,
+          input,
+          toolUseContext,
+          mainPermissionResult.suggestions,
+        ).then(decision => ({ source: 'hook' as const, decision }))
+
+        // Start the SDK permission prompt immediately (don't wait for hooks)
+        const requestId = randomUUID()
+        onPermissionPrompt?.(
+          buildRequiresActionDetails(tool, input, toolUseID, requestId),
+        )
+        const sdkPromise = this.sendRequest<PermissionToolOutput>(
+          {
+            subtype: 'can_use_tool',
+            tool_name: tool.name,
+            input,
+            permission_suggestions: mainPermissionResult.suggestions,
+            blocked_path: mainPermissionResult.blockedPath,
+            decision_reason: serializeDecisionReason(
+              mainPermissionResult.decisionReason,
+            ),
+            tool_use_id: toolUseID,
+            agent_id: toolUseContext.agentId,
+          },
+          permissionToolOutputSchema(),
+          hookAbortController.signal,
+          requestId,
+        ).then(result => ({ source: 'sdk' as const, result }))
+
+        // Race: hook completion vs SDK prompt response.
+        // The hook promise always resolves (never rejects), returning
+        // undefined if no hook made a decision.
+        const winner = await Promise.race([hookPromise, sdkPromise])
+
+        if (winner.source === 'hook') {
+          if (winner.decision) {
+            // Hook decided — abort the pending SDK request.
+            // Suppress the expected AbortError rejection from sdkPromise.
+            sdkPromise.catch(() => {})
+            hookAbortController.abort()
+            return winner.decision
+          }
+          // Hook passed through (no decision) — wait for the SDK prompt
+          const sdkResult = await sdkPromise
+          return permissionPromptToolResultToPermissionDecision(
+            sdkResult.result,
+            tool,
+            input,
+            toolUseContext,
+          )
+        }
+
+        // SDK prompt responded first — use its result (hook still running
+        // in background but its result will be ignored)
+        return permissionPromptToolResultToPermissionDecision(
+          winner.result,
+          tool,
+          input,
+          toolUseContext,
+        )
+      } catch (error) {
+        return permissionPromptToolResultToPermissionDecision(
+          {
+            behavior: 'deny',
+            message: `Tool permission request failed: ${error}`,
+            toolUseID,
+          },
+          tool,
+          input,
+          toolUseContext,
+        )
+      } finally {
+        // Only transition back to 'running' if no other permission prompts
+        // are pending (concurrent tool execution can have multiple in-flight).
+        if (this.getPendingPermissionRequests().length === 0) {
+          notifySessionStateChanged('running')
+        }
+        parentSignal.removeEventListener('abort', onParentAbort)
+      }
+    }
+  }
+
+  createHookCallback(callbackId: string, timeout?: number): HookCallback {
+    return {
+      type: 'callback',
+      timeout,
+      callback: async (
+        input: HookInput,
+        toolUseID: string | null,
+        abort: AbortSignal | undefined,
+      ): Promise<HookJSONOutput> => {
+        try {
+          const result = await this.sendRequest<HookJSONOutput>(
+            {
+              subtype: 'hook_callback',
+              callback_id: callbackId,
+              input,
+              tool_use_id: toolUseID || undefined,
+            },
+            hookJSONOutputSchema(),
+            abort,
+          )
+          return result
+        } catch (error) {
+          // biome-ignore lint/suspicious/noConsole:: intentional console output
+          console.error(`Error in hook callback ${callbackId}:`, error)
+          return {}
+        }
+      },
+    }
+  }
+
+  /**
+   * Sends an elicitation request to the SDK consumer and returns the response.
+   */
+  async handleElicitation(
+    serverName: string,
+    message: string,
+    requestedSchema?: Record<string, unknown>,
+    signal?: AbortSignal,
+    mode?: 'form' | 'url',
+    url?: string,
+    elicitationId?: string,
+  ): Promise<ElicitResult> {
+    try {
+      const result = await this.sendRequest<ElicitResult>(
+        {
+          subtype: 'elicitation',
+          mcp_server_name: serverName,
+          message,
+          mode,
+          url,
+          elicitation_id: elicitationId,
+          requested_schema: requestedSchema,
+        },
+        SDKControlElicitationResponseSchema(),
+        signal,
+      )
+      return result
+    } catch {
+      return { action: 'cancel' as const }
+    }
+  }
+
+  /**
+   * Creates a SandboxAskCallback that forwards sandbox network permission
+   * requests to the SDK host as can_use_tool control_requests.
+   *
+   * This piggybacks on the existing can_use_tool protocol with a synthetic
+   * tool name so that SDK hosts (VS Code, CCR, etc.) can prompt the user
+   * for network access without requiring a new protocol subtype.
+   */
+  createSandboxAskCallback(): (hostPattern: {
+    host: string
+    port?: number
+  }) => Promise<boolean> {
+    return async (hostPattern): Promise<boolean> => {
+      try {
+        const result = await this.sendRequest<PermissionToolOutput>(
+          {
+            subtype: 'can_use_tool',
+            tool_name: SANDBOX_NETWORK_ACCESS_TOOL_NAME,
+            input: { host: hostPattern.host },
+            tool_use_id: randomUUID(),
+            description: `Allow network connection to ${hostPattern.host}?`,
+          },
+          permissionToolOutputSchema(),
+        )
+        return result.behavior === 'allow'
+      } catch {
+        // If the request fails (stream closed, abort, etc.), deny the connection
+        return false
+      }
+    }
+  }
+
+  /**
+   * Sends an MCP message to an SDK server and waits for the response
+   */
+  async sendMcpMessage(
+    serverName: string,
+    message: JSONRPCMessage,
+  ): Promise<JSONRPCMessage> {
+    const response = await this.sendRequest<{ mcp_response: JSONRPCMessage }>(
+      {
+        subtype: 'mcp_message',
+        server_name: serverName,
+        message,
+      },
+      z.object({
+        mcp_response: z.any() as z.Schema<JSONRPCMessage>,
+      }),
+    )
+    return response.mcp_response
+  }
+}
+
+function exitWithMessage(message: string): never {
+  // biome-ignore lint/suspicious/noConsole:: intentional console output
+  console.error(message)
+  // eslint-disable-next-line custom-rules/no-process-exit
+  process.exit(1)
+}
+
+/**
+ * Execute PermissionRequest hooks and return a decision if one is made.
+ * Returns undefined if no hook made a decision.
+ */
+async function executePermissionRequestHooksForSDK(
+  toolName: string,
+  toolUseID: string,
+  input: Record<string, unknown>,
+  toolUseContext: ToolUseContext,
+  suggestions: PermissionUpdate[] | undefined,
+): Promise<PermissionDecision | undefined> {
+  const appState = toolUseContext.getAppState()
+  const permissionMode = appState.toolPermissionContext.mode
+
+  // Iterate directly over the generator instead of using `all`
+  const hookGenerator = executePermissionRequestHooks(
+    toolName,
+    toolUseID,
+    input,
+    toolUseContext,
+    permissionMode,
+    suggestions,
+    toolUseContext.abortController.signal,
+  )
+
+  for await (const hookResult of hookGenerator) {
+    if (
+      hookResult.permissionRequestResult &&
+      (hookResult.permissionRequestResult.behavior === 'allow' ||
+        hookResult.permissionRequestResult.behavior === 'deny')
+    ) {
+      const decision = hookResult.permissionRequestResult
+      if (decision.behavior === 'allow') {
+        const finalInput = decision.updatedInput || input
+
+        // Apply permission updates if provided by hook ("always allow")
+        const permissionUpdates = decision.updatedPermissions ?? []
+        if (permissionUpdates.length > 0) {
+          persistPermissionUpdates(permissionUpdates)
+          const currentAppState = toolUseContext.getAppState()
+          const updatedContext = applyPermissionUpdates(
+            currentAppState.toolPermissionContext,
+            permissionUpdates,
+          )
+          // Update permission context via setAppState
+          toolUseContext.setAppState(prev => {
+            if (prev.toolPermissionContext === updatedContext) return prev
+            return { ...prev, toolPermissionContext: updatedContext }
+          })
+        }
+
+        return {
+          behavior: 'allow',
+          updatedInput: finalInput,
+          userModified: false,
+          decisionReason: {
+            type: 'hook',
+            hookName: 'PermissionRequest',
+          },
+        }
+      } else {
+        // Hook denied the permission
+        return {
+          behavior: 'deny',
+          message:
+            decision.message || 'Permission denied by PermissionRequest hook',
+          decisionReason: {
+            type: 'hook',
+            hookName: 'PermissionRequest',
+          },
+        }
+      }
+    }
+  }
+
+  return undefined
+}

+ 282 - 0
src/cli/transports/HybridTransport.ts

@@ -0,0 +1,282 @@
+import axios, { type AxiosError } from 'axios'
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { getSessionIngressAuthToken } from '../../utils/sessionIngressAuth.js'
+import { SerialBatchEventUploader } from './SerialBatchEventUploader.js'
+import {
+  WebSocketTransport,
+  type WebSocketTransportOptions,
+} from './WebSocketTransport.js'
+
+const BATCH_FLUSH_INTERVAL_MS = 100
+// Per-attempt POST timeout. Bounds how long a single stuck POST can block
+// the serialized queue. Without this, a hung connection stalls all writes.
+const POST_TIMEOUT_MS = 15_000
+// Grace period for queued writes on close(). Covers a healthy POST (~100ms)
+// plus headroom; best-effort, not a delivery guarantee under degraded network.
+// Void-ed (nothing awaits it) so this is a last resort — replBridge teardown
+// now closes AFTER archive so archive latency is the primary drain window.
+// NOTE: gracefulShutdown's cleanup budget is 2s (not the 5s outer failsafe);
+// 3s here exceeds it, but the process lives ~2s longer for hooks+analytics.
+const CLOSE_GRACE_MS = 3000
+
+/**
+ * Hybrid transport: WebSocket for reads, HTTP POST for writes.
+ *
+ * Write flow:
+ *
+ *   write(stream_event) ─┐
+ *                        │ (100ms timer)
+ *                        │
+ *                        ▼
+ *   write(other) ────► uploader.enqueue()  (SerialBatchEventUploader)
+ *                        ▲    │
+ *   writeBatch() ────────┘    │ serial, batched, retries indefinitely,
+ *                             │ backpressure at maxQueueSize
+ *                             ▼
+ *                        postOnce()  (single HTTP POST, throws on retryable)
+ *
+ * stream_event messages accumulate in streamEventBuffer for up to 100ms
+ * before enqueue (reduces POST count for high-volume content deltas). A
+ * non-stream write flushes any buffered stream_events first to preserve order.
+ *
+ * Serialization + retry + backpressure are delegated to SerialBatchEventUploader
+ * (same primitive CCR uses). At most one POST in-flight; events arriving during
+ * a POST batch into the next one. On failure, the uploader re-queues and retries
+ * with exponential backoff + jitter. If the queue fills past maxQueueSize,
+ * enqueue() blocks — giving awaiting callers backpressure.
+ *
+ * Why serialize? Bridge mode fires writes via `void transport.write()`
+ * (fire-and-forget). Without this, concurrent POSTs → concurrent Firestore
+ * writes to the same document → collisions → retry storms → pages oncall.
+ */
+export class HybridTransport extends WebSocketTransport {
+  private postUrl: string
+  private uploader: SerialBatchEventUploader<StdoutMessage>
+
+  // stream_event delay buffer — accumulates content deltas for up to
+  // BATCH_FLUSH_INTERVAL_MS before enqueueing (reduces POST count)
+  private streamEventBuffer: StdoutMessage[] = []
+  private streamEventTimer: ReturnType<typeof setTimeout> | null = null
+
+  constructor(
+    url: URL,
+    headers: Record<string, string> = {},
+    sessionId?: string,
+    refreshHeaders?: () => Record<string, string>,
+    options?: WebSocketTransportOptions & {
+      maxConsecutiveFailures?: number
+      onBatchDropped?: (batchSize: number, failures: number) => void
+    },
+  ) {
+    super(url, headers, sessionId, refreshHeaders, options)
+    const { maxConsecutiveFailures, onBatchDropped } = options ?? {}
+    this.postUrl = convertWsUrlToPostUrl(url)
+    this.uploader = new SerialBatchEventUploader<StdoutMessage>({
+      // Large cap — session-ingress accepts arbitrary batch sizes. Events
+      // naturally batch during in-flight POSTs; this just bounds the payload.
+      maxBatchSize: 500,
+      // Bridge callers use `void transport.write()` — backpressure doesn't
+      // apply (they don't await). A batch >maxQueueSize deadlocks (see
+      // SerialBatchEventUploader backpressure check). So set it high enough
+      // to be a memory bound only. Wire real backpressure in a follow-up
+      // once callers await.
+      maxQueueSize: 100_000,
+      baseDelayMs: 500,
+      maxDelayMs: 8000,
+      jitterMs: 1000,
+      // Optional cap so a persistently-failing server can't pin the drain
+      // loop for the lifetime of the process. Undefined = indefinite retry.
+      // replBridge sets this; the 1P transportUtils path does not.
+      maxConsecutiveFailures,
+      onBatchDropped: (batchSize, failures) => {
+        logForDiagnosticsNoPII(
+          'error',
+          'cli_hybrid_batch_dropped_max_failures',
+          {
+            batchSize,
+            failures,
+          },
+        )
+        onBatchDropped?.(batchSize, failures)
+      },
+      send: batch => this.postOnce(batch),
+    })
+    logForDebugging(`HybridTransport: POST URL = ${this.postUrl}`)
+    logForDiagnosticsNoPII('info', 'cli_hybrid_transport_initialized')
+  }
+
+  /**
+   * Enqueue a message and wait for the queue to drain. Returning flush()
+   * preserves the contract that `await write()` resolves after the event is
+   * POSTed (relied on by tests and replBridge's initial flush). Fire-and-forget
+   * callers (`void transport.write()`) are unaffected — they don't await,
+   * so the later resolution doesn't add latency.
+   */
+  override async write(message: StdoutMessage): Promise<void> {
+    if (message.type === 'stream_event') {
+      // Delay: accumulate stream_events briefly before enqueueing.
+      // Promise resolves immediately — callers don't await stream_events.
+      this.streamEventBuffer.push(message)
+      if (!this.streamEventTimer) {
+        this.streamEventTimer = setTimeout(
+          () => this.flushStreamEvents(),
+          BATCH_FLUSH_INTERVAL_MS,
+        )
+      }
+      return
+    }
+    // Immediate: flush any buffered stream_events (ordering), then this event.
+    await this.uploader.enqueue([...this.takeStreamEvents(), message])
+    return this.uploader.flush()
+  }
+
+  async writeBatch(messages: StdoutMessage[]): Promise<void> {
+    await this.uploader.enqueue([...this.takeStreamEvents(), ...messages])
+    return this.uploader.flush()
+  }
+
+  /** Snapshot before/after writeBatch() to detect silent drops. */
+  get droppedBatchCount(): number {
+    return this.uploader.droppedBatchCount
+  }
+
+  /**
+   * Block until all pending events are POSTed. Used by bridge's initial
+   * history flush so onStateChange('connected') fires after persistence.
+   */
+  flush(): Promise<void> {
+    void this.uploader.enqueue(this.takeStreamEvents())
+    return this.uploader.flush()
+  }
+
+  /** Take ownership of buffered stream_events and clear the delay timer. */
+  private takeStreamEvents(): StdoutMessage[] {
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    const buffered = this.streamEventBuffer
+    this.streamEventBuffer = []
+    return buffered
+  }
+
+  /** Delay timer fired — enqueue accumulated stream_events. */
+  private flushStreamEvents(): void {
+    this.streamEventTimer = null
+    void this.uploader.enqueue(this.takeStreamEvents())
+  }
+
+  override close(): void {
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    this.streamEventBuffer = []
+    // Grace period for queued writes — fallback. replBridge teardown now
+    // awaits archive between write and close (see CLOSE_GRACE_MS), so
+    // archive latency is the primary drain window and this is a last
+    // resort. Keep close() sync (returns immediately) but defer
+    // uploader.close() so any remaining queue gets a chance to finish.
+    const uploader = this.uploader
+    let graceTimer: ReturnType<typeof setTimeout> | undefined
+    void Promise.race([
+      uploader.flush(),
+      new Promise<void>(r => {
+        // eslint-disable-next-line no-restricted-syntax -- need timer ref for clearTimeout
+        graceTimer = setTimeout(r, CLOSE_GRACE_MS)
+      }),
+    ]).finally(() => {
+      clearTimeout(graceTimer)
+      uploader.close()
+    })
+    super.close()
+  }
+
+  /**
+   * Single-attempt POST. Throws on retryable failures (429, 5xx, network)
+   * so SerialBatchEventUploader re-queues and retries. Returns on success
+   * and on permanent failures (4xx non-429, no token) so the uploader moves on.
+   */
+  private async postOnce(events: StdoutMessage[]): Promise<void> {
+    const sessionToken = getSessionIngressAuthToken()
+    if (!sessionToken) {
+      logForDebugging('HybridTransport: No session token available for POST')
+      logForDiagnosticsNoPII('warn', 'cli_hybrid_post_no_token')
+      return
+    }
+
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${sessionToken}`,
+      'Content-Type': 'application/json',
+    }
+
+    let response
+    try {
+      response = await axios.post(
+        this.postUrl,
+        { events },
+        {
+          headers,
+          validateStatus: () => true,
+          timeout: POST_TIMEOUT_MS,
+        },
+      )
+    } catch (error) {
+      const axiosError = error as AxiosError
+      logForDebugging(`HybridTransport: POST error: ${axiosError.message}`)
+      logForDiagnosticsNoPII('warn', 'cli_hybrid_post_network_error')
+      throw error
+    }
+
+    if (response.status >= 200 && response.status < 300) {
+      logForDebugging(`HybridTransport: POST success count=${events.length}`)
+      return
+    }
+
+    // 4xx (except 429) are permanent — drop, don't retry.
+    if (
+      response.status >= 400 &&
+      response.status < 500 &&
+      response.status !== 429
+    ) {
+      logForDebugging(
+        `HybridTransport: POST returned ${response.status} (permanent), dropping`,
+      )
+      logForDiagnosticsNoPII('warn', 'cli_hybrid_post_client_error', {
+        status: response.status,
+      })
+      return
+    }
+
+    // 429 / 5xx — retryable. Throw so uploader re-queues and backs off.
+    logForDebugging(
+      `HybridTransport: POST returned ${response.status} (retryable)`,
+    )
+    logForDiagnosticsNoPII('warn', 'cli_hybrid_post_retryable_error', {
+      status: response.status,
+    })
+    throw new Error(`POST failed with ${response.status}`)
+  }
+}
+
+/**
+ * Convert a WebSocket URL to the HTTP POST endpoint URL.
+ * From: wss://api.example.com/v2/session_ingress/ws/<session_id>
+ * To: https://api.example.com/v2/session_ingress/session/<session_id>/events
+ */
+function convertWsUrlToPostUrl(wsUrl: URL): string {
+  const protocol = wsUrl.protocol === 'wss:' ? 'https:' : 'http:'
+
+  // Replace /ws/ with /session/ and append /events
+  let pathname = wsUrl.pathname
+  pathname = pathname.replace('/ws/', '/session/')
+  if (!pathname.endsWith('/events')) {
+    pathname = pathname.endsWith('/')
+      ? pathname + 'events'
+      : pathname + '/events'
+  }
+
+  return `${protocol}//${wsUrl.host}${pathname}${wsUrl.search}`
+}

+ 711 - 0
src/cli/transports/SSETransport.ts

@@ -0,0 +1,711 @@
+import axios, { type AxiosError } from 'axios'
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { errorMessage } from '../../utils/errors.js'
+import { getSessionIngressAuthHeaders } from '../../utils/sessionIngressAuth.js'
+import { sleep } from '../../utils/sleep.js'
+import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+import type { Transport } from './Transport.js'
+
+// ---------------------------------------------------------------------------
+// Configuration
+// ---------------------------------------------------------------------------
+
+const RECONNECT_BASE_DELAY_MS = 1000
+const RECONNECT_MAX_DELAY_MS = 30_000
+/** Time budget for reconnection attempts before giving up (10 minutes). */
+const RECONNECT_GIVE_UP_MS = 600_000
+/** Server sends keepalives every 15s; treat connection as dead after 45s of silence. */
+const LIVENESS_TIMEOUT_MS = 45_000
+
+/**
+ * HTTP status codes that indicate a permanent server-side rejection.
+ * The transport transitions to 'closed' immediately without retrying.
+ */
+const PERMANENT_HTTP_CODES = new Set([401, 403, 404])
+
+// POST retry configuration (matches HybridTransport)
+const POST_MAX_RETRIES = 10
+const POST_BASE_DELAY_MS = 500
+const POST_MAX_DELAY_MS = 8000
+
+/** Hoisted TextDecoder options to avoid per-chunk allocation in readStream. */
+const STREAM_DECODE_OPTS: TextDecodeOptions = { stream: true }
+
+/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
+function alwaysValidStatus(): boolean {
+  return true
+}
+
+// ---------------------------------------------------------------------------
+// SSE Frame Parser
+// ---------------------------------------------------------------------------
+
+type SSEFrame = {
+  event?: string
+  id?: string
+  data?: string
+}
+
+/**
+ * Incrementally parse SSE frames from a text buffer.
+ * Returns parsed frames and the remaining (incomplete) buffer.
+ *
+ * @internal exported for testing
+ */
+export function parseSSEFrames(buffer: string): {
+  frames: SSEFrame[]
+  remaining: string
+} {
+  const frames: SSEFrame[] = []
+  let pos = 0
+
+  // SSE frames are delimited by double newlines
+  let idx: number
+  while ((idx = buffer.indexOf('\n\n', pos)) !== -1) {
+    const rawFrame = buffer.slice(pos, idx)
+    pos = idx + 2
+
+    // Skip empty frames
+    if (!rawFrame.trim()) continue
+
+    const frame: SSEFrame = {}
+    let isComment = false
+
+    for (const line of rawFrame.split('\n')) {
+      if (line.startsWith(':')) {
+        // SSE comment (e.g., `:keepalive`)
+        isComment = true
+        continue
+      }
+
+      const colonIdx = line.indexOf(':')
+      if (colonIdx === -1) continue
+
+      const field = line.slice(0, colonIdx)
+      // Per SSE spec, strip one leading space after colon if present
+      const value =
+        line[colonIdx + 1] === ' '
+          ? line.slice(colonIdx + 2)
+          : line.slice(colonIdx + 1)
+
+      switch (field) {
+        case 'event':
+          frame.event = value
+          break
+        case 'id':
+          frame.id = value
+          break
+        case 'data':
+          // Per SSE spec, multiple data: lines are concatenated with \n
+          frame.data = frame.data ? frame.data + '\n' + value : value
+          break
+        // Ignore other fields (retry:, etc.)
+      }
+    }
+
+    // Only emit frames that have data (or are pure comments which reset liveness)
+    if (frame.data || isComment) {
+      frames.push(frame)
+    }
+  }
+
+  return { frames, remaining: buffer.slice(pos) }
+}
+
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+
+type SSETransportState =
+  | 'idle'
+  | 'connected'
+  | 'reconnecting'
+  | 'closing'
+  | 'closed'
+
+/**
+ * Payload for `event: client_event` frames, matching the StreamClientEvent
+ * proto message in session_stream.proto. This is the only event type sent
+ * to worker subscribers — delivery_update, session_update, ephemeral_event,
+ * and catch_up_truncated are client-channel-only (see notifier.go and
+ * event_stream.go SubscriberClient guard).
+ */
+export type StreamClientEvent = {
+  event_id: string
+  sequence_num: number
+  event_type: string
+  source: string
+  payload: Record<string, unknown>
+  created_at: string
+}
+
+// ---------------------------------------------------------------------------
+// SSETransport
+// ---------------------------------------------------------------------------
+
+/**
+ * Transport that uses SSE for reading and HTTP POST for writing.
+ *
+ * Reads events via Server-Sent Events from the CCR v2 event stream endpoint.
+ * Writes events via HTTP POST with retry logic (same pattern as HybridTransport).
+ *
+ * Each `event: client_event` frame carries a StreamClientEvent proto JSON
+ * directly in `data:`. The transport extracts `payload` and passes it to
+ * `onData` as newline-delimited JSON for StructuredIO consumers.
+ *
+ * Supports automatic reconnection with exponential backoff and Last-Event-ID
+ * for resumption after disconnection.
+ */
+export class SSETransport implements Transport {
+  private state: SSETransportState = 'idle'
+  private onData?: (data: string) => void
+  private onCloseCallback?: (closeCode?: number) => void
+  private onEventCallback?: (event: StreamClientEvent) => void
+  private headers: Record<string, string>
+  private sessionId?: string
+  private refreshHeaders?: () => Record<string, string>
+  private readonly getAuthHeaders: () => Record<string, string>
+
+  // SSE connection state
+  private abortController: AbortController | null = null
+  private lastSequenceNum = 0
+  private seenSequenceNums = new Set<number>()
+
+  // Reconnection state
+  private reconnectAttempts = 0
+  private reconnectStartTime: number | null = null
+  private reconnectTimer: NodeJS.Timeout | null = null
+
+  // Liveness detection
+  private livenessTimer: NodeJS.Timeout | null = null
+
+  // POST URL (derived from SSE URL)
+  private postUrl: string
+
+  // Runtime epoch for CCR v2 event format
+
+  constructor(
+    private readonly url: URL,
+    headers: Record<string, string> = {},
+    sessionId?: string,
+    refreshHeaders?: () => Record<string, string>,
+    initialSequenceNum?: number,
+    /**
+     * Per-instance auth header source. Omit to read the process-wide
+     * CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers). Required
+     * for concurrent multi-session callers — the env-var path is a process
+     * global and would stomp across sessions.
+     */
+    getAuthHeaders?: () => Record<string, string>,
+  ) {
+    this.headers = headers
+    this.sessionId = sessionId
+    this.refreshHeaders = refreshHeaders
+    this.getAuthHeaders = getAuthHeaders ?? getSessionIngressAuthHeaders
+    this.postUrl = convertSSEUrlToPostUrl(url)
+    // Seed with a caller-provided high-water mark so the first connect()
+    // sends from_sequence_num / Last-Event-ID. Without this, a fresh
+    // SSETransport always asks the server to replay from sequence 0 —
+    // the entire session history on every transport swap.
+    if (initialSequenceNum !== undefined && initialSequenceNum > 0) {
+      this.lastSequenceNum = initialSequenceNum
+    }
+    logForDebugging(`SSETransport: SSE URL = ${url.href}`)
+    logForDebugging(`SSETransport: POST URL = ${this.postUrl}`)
+    logForDiagnosticsNoPII('info', 'cli_sse_transport_initialized')
+  }
+
+  /**
+   * High-water mark of sequence numbers seen on this stream. Callers that
+   * recreate the transport (e.g. replBridge onWorkReceived) read this before
+   * close() and pass it as `initialSequenceNum` to the next instance so the
+   * server resumes from the right point instead of replaying everything.
+   */
+  getLastSequenceNum(): number {
+    return this.lastSequenceNum
+  }
+
+  async connect(): Promise<void> {
+    if (this.state !== 'idle' && this.state !== 'reconnecting') {
+      logForDebugging(
+        `SSETransport: Cannot connect, current state is ${this.state}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_connect_failed')
+      return
+    }
+
+    this.state = 'reconnecting'
+    const connectStartTime = Date.now()
+
+    // Build SSE URL with sequence number for resumption
+    const sseUrl = new URL(this.url.href)
+    if (this.lastSequenceNum > 0) {
+      sseUrl.searchParams.set('from_sequence_num', String(this.lastSequenceNum))
+    }
+
+    // Build headers -- use fresh auth headers (supports Cookie for session keys).
+    // Remove stale Authorization header from this.headers when Cookie auth is used,
+    // since sending both confuses the auth interceptor.
+    const authHeaders = this.getAuthHeaders()
+    const headers: Record<string, string> = {
+      ...this.headers,
+      ...authHeaders,
+      Accept: 'text/event-stream',
+      'anthropic-version': '2023-06-01',
+      'User-Agent': getClaudeCodeUserAgent(),
+    }
+    if (authHeaders['Cookie']) {
+      delete headers['Authorization']
+    }
+    if (this.lastSequenceNum > 0) {
+      headers['Last-Event-ID'] = String(this.lastSequenceNum)
+    }
+
+    logForDebugging(`SSETransport: Opening ${sseUrl.href}`)
+    logForDiagnosticsNoPII('info', 'cli_sse_connect_opening')
+
+    this.abortController = new AbortController()
+
+    try {
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      const response = await fetch(sseUrl.href, {
+        headers,
+        signal: this.abortController.signal,
+      })
+
+      if (!response.ok) {
+        const isPermanent = PERMANENT_HTTP_CODES.has(response.status)
+        logForDebugging(
+          `SSETransport: HTTP ${response.status}${isPermanent ? ' (permanent)' : ''}`,
+          { level: 'error' },
+        )
+        logForDiagnosticsNoPII('error', 'cli_sse_connect_http_error', {
+          status: response.status,
+        })
+
+        if (isPermanent) {
+          this.state = 'closed'
+          this.onCloseCallback?.(response.status)
+          return
+        }
+
+        this.handleConnectionError()
+        return
+      }
+
+      if (!response.body) {
+        logForDebugging('SSETransport: No response body')
+        this.handleConnectionError()
+        return
+      }
+
+      // Successfully connected
+      const connectDuration = Date.now() - connectStartTime
+      logForDebugging('SSETransport: Connected')
+      logForDiagnosticsNoPII('info', 'cli_sse_connect_connected', {
+        duration_ms: connectDuration,
+      })
+
+      this.state = 'connected'
+      this.reconnectAttempts = 0
+      this.reconnectStartTime = null
+      this.resetLivenessTimer()
+
+      // Read the SSE stream
+      await this.readStream(response.body)
+    } catch (error) {
+      if (this.abortController?.signal.aborted) {
+        // Intentional close
+        return
+      }
+
+      logForDebugging(
+        `SSETransport: Connection error: ${errorMessage(error)}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_connect_error')
+      this.handleConnectionError()
+    }
+  }
+
+  /**
+   * Read and process the SSE stream body.
+   */
+  // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+  private async readStream(body: ReadableStream<Uint8Array>): Promise<void> {
+    const reader = body.getReader()
+    const decoder = new TextDecoder()
+    let buffer = ''
+
+    try {
+      while (true) {
+        const { done, value } = await reader.read()
+        if (done) break
+
+        buffer += decoder.decode(value, STREAM_DECODE_OPTS)
+        const { frames, remaining } = parseSSEFrames(buffer)
+        buffer = remaining
+
+        for (const frame of frames) {
+          // Any frame (including keepalive comments) proves the connection is alive
+          this.resetLivenessTimer()
+
+          if (frame.id) {
+            const seqNum = parseInt(frame.id, 10)
+            if (!isNaN(seqNum)) {
+              if (this.seenSequenceNums.has(seqNum)) {
+                logForDebugging(
+                  `SSETransport: DUPLICATE frame seq=${seqNum} (lastSequenceNum=${this.lastSequenceNum}, seenCount=${this.seenSequenceNums.size})`,
+                  { level: 'warn' },
+                )
+                logForDiagnosticsNoPII('warn', 'cli_sse_duplicate_sequence')
+              } else {
+                this.seenSequenceNums.add(seqNum)
+                // Prevent unbounded growth: once we have many entries, prune
+                // old sequence numbers that are well below the high-water mark.
+                // Only sequence numbers near lastSequenceNum matter for dedup.
+                if (this.seenSequenceNums.size > 1000) {
+                  const threshold = this.lastSequenceNum - 200
+                  for (const s of this.seenSequenceNums) {
+                    if (s < threshold) {
+                      this.seenSequenceNums.delete(s)
+                    }
+                  }
+                }
+              }
+              if (seqNum > this.lastSequenceNum) {
+                this.lastSequenceNum = seqNum
+              }
+            }
+          }
+
+          if (frame.event && frame.data) {
+            this.handleSSEFrame(frame.event, frame.data)
+          } else if (frame.data) {
+            // data: without event: — server is emitting the old envelope format
+            // or a bug. Log so incidents show as a signal instead of silent drops.
+            logForDebugging(
+              'SSETransport: Frame has data: but no event: field — dropped',
+              { level: 'warn' },
+            )
+            logForDiagnosticsNoPII('warn', 'cli_sse_frame_missing_event_field')
+          }
+        }
+      }
+    } catch (error) {
+      if (this.abortController?.signal.aborted) return
+      logForDebugging(
+        `SSETransport: Stream read error: ${errorMessage(error)}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_stream_read_error')
+    } finally {
+      reader.releaseLock()
+    }
+
+    // Stream ended — reconnect unless we're closing
+    if (this.state !== 'closing' && this.state !== 'closed') {
+      logForDebugging('SSETransport: Stream ended, reconnecting')
+      this.handleConnectionError()
+    }
+  }
+
+  /**
+   * Handle a single SSE frame. The event: field names the variant; data:
+   * carries the inner proto JSON directly (no envelope).
+   *
+   * Worker subscribers only receive client_event frames (see notifier.go) —
+   * any other event type indicates a server-side change that CC doesn't yet
+   * understand. Log a diagnostic so we notice in telemetry.
+   */
+  private handleSSEFrame(eventType: string, data: string): void {
+    if (eventType !== 'client_event') {
+      logForDebugging(
+        `SSETransport: Unexpected SSE event type '${eventType}' on worker stream`,
+        { level: 'warn' },
+      )
+      logForDiagnosticsNoPII('warn', 'cli_sse_unexpected_event_type', {
+        event_type: eventType,
+      })
+      return
+    }
+
+    let ev: StreamClientEvent
+    try {
+      ev = jsonParse(data) as StreamClientEvent
+    } catch (error) {
+      logForDebugging(
+        `SSETransport: Failed to parse client_event data: ${errorMessage(error)}`,
+        { level: 'error' },
+      )
+      return
+    }
+
+    const payload = ev.payload
+    if (payload && typeof payload === 'object' && 'type' in payload) {
+      const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
+      logForDebugging(
+        `SSETransport: Event seq=${ev.sequence_num} event_id=${ev.event_id} event_type=${ev.event_type} payload_type=${String(payload.type)}${sessionLabel}`,
+      )
+      logForDiagnosticsNoPII('info', 'cli_sse_message_received')
+      // Pass the unwrapped payload as newline-delimited JSON,
+      // matching the format that StructuredIO/WebSocketTransport consumers expect
+      this.onData?.(jsonStringify(payload) + '\n')
+    } else {
+      logForDebugging(
+        `SSETransport: Ignoring client_event with no type in payload: event_id=${ev.event_id}`,
+      )
+    }
+
+    this.onEventCallback?.(ev)
+  }
+
+  /**
+   * Handle connection errors with exponential backoff and time budget.
+   */
+  private handleConnectionError(): void {
+    this.clearLivenessTimer()
+
+    if (this.state === 'closing' || this.state === 'closed') return
+
+    // Abort any in-flight SSE fetch
+    this.abortController?.abort()
+    this.abortController = null
+
+    const now = Date.now()
+    if (!this.reconnectStartTime) {
+      this.reconnectStartTime = now
+    }
+
+    const elapsed = now - this.reconnectStartTime
+    if (elapsed < RECONNECT_GIVE_UP_MS) {
+      // Clear any existing timer
+      if (this.reconnectTimer) {
+        clearTimeout(this.reconnectTimer)
+        this.reconnectTimer = null
+      }
+
+      // Refresh headers before reconnecting
+      if (this.refreshHeaders) {
+        const freshHeaders = this.refreshHeaders()
+        Object.assign(this.headers, freshHeaders)
+        logForDebugging('SSETransport: Refreshed headers for reconnect')
+      }
+
+      this.state = 'reconnecting'
+      this.reconnectAttempts++
+
+      const baseDelay = Math.min(
+        RECONNECT_BASE_DELAY_MS * Math.pow(2, this.reconnectAttempts - 1),
+        RECONNECT_MAX_DELAY_MS,
+      )
+      // Add ±25% jitter
+      const delay = Math.max(
+        0,
+        baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
+      )
+
+      logForDebugging(
+        `SSETransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_reconnect_attempt', {
+        reconnectAttempts: this.reconnectAttempts,
+      })
+
+      this.reconnectTimer = setTimeout(() => {
+        this.reconnectTimer = null
+        void this.connect()
+      }, delay)
+    } else {
+      logForDebugging(
+        `SSETransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_sse_reconnect_exhausted', {
+        reconnectAttempts: this.reconnectAttempts,
+        elapsedMs: elapsed,
+      })
+      this.state = 'closed'
+      this.onCloseCallback?.()
+    }
+  }
+
+  /**
+   * Bound timeout callback. Hoisted from an inline closure so that
+   * resetLivenessTimer (called per-frame) does not allocate a new closure
+   * on every SSE frame.
+   */
+  private readonly onLivenessTimeout = (): void => {
+    this.livenessTimer = null
+    logForDebugging('SSETransport: Liveness timeout, reconnecting', {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_sse_liveness_timeout')
+    this.abortController?.abort()
+    this.handleConnectionError()
+  }
+
+  /**
+   * Reset the liveness timer. If no SSE frame arrives within the timeout,
+   * treat the connection as dead and reconnect.
+   */
+  private resetLivenessTimer(): void {
+    this.clearLivenessTimer()
+    this.livenessTimer = setTimeout(this.onLivenessTimeout, LIVENESS_TIMEOUT_MS)
+  }
+
+  private clearLivenessTimer(): void {
+    if (this.livenessTimer) {
+      clearTimeout(this.livenessTimer)
+      this.livenessTimer = null
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Write (HTTP POST) — same pattern as HybridTransport
+  // -----------------------------------------------------------------------
+
+  async write(message: StdoutMessage): Promise<void> {
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) {
+      logForDebugging('SSETransport: No session token available for POST')
+      logForDiagnosticsNoPII('warn', 'cli_sse_post_no_token')
+      return
+    }
+
+    const headers: Record<string, string> = {
+      ...authHeaders,
+      'Content-Type': 'application/json',
+      'anthropic-version': '2023-06-01',
+      'User-Agent': getClaudeCodeUserAgent(),
+    }
+
+    logForDebugging(
+      `SSETransport: POST body keys=${Object.keys(message as Record<string, unknown>).join(',')}`,
+    )
+
+    for (let attempt = 1; attempt <= POST_MAX_RETRIES; attempt++) {
+      try {
+        const response = await axios.post(this.postUrl, message, {
+          headers,
+          validateStatus: alwaysValidStatus,
+        })
+
+        if (response.status === 200 || response.status === 201) {
+          logForDebugging(`SSETransport: POST success type=${message.type}`)
+          return
+        }
+
+        logForDebugging(
+          `SSETransport: POST ${response.status} body=${jsonStringify(response.data).slice(0, 200)}`,
+        )
+        // 4xx errors (except 429) are permanent - don't retry
+        if (
+          response.status >= 400 &&
+          response.status < 500 &&
+          response.status !== 429
+        ) {
+          logForDebugging(
+            `SSETransport: POST returned ${response.status} (client error), not retrying`,
+          )
+          logForDiagnosticsNoPII('warn', 'cli_sse_post_client_error', {
+            status: response.status,
+          })
+          return
+        }
+
+        // 429 or 5xx - retry
+        logForDebugging(
+          `SSETransport: POST returned ${response.status}, attempt ${attempt}/${POST_MAX_RETRIES}`,
+        )
+        logForDiagnosticsNoPII('warn', 'cli_sse_post_retryable_error', {
+          status: response.status,
+          attempt,
+        })
+      } catch (error) {
+        const axiosError = error as AxiosError
+        logForDebugging(
+          `SSETransport: POST error: ${axiosError.message}, attempt ${attempt}/${POST_MAX_RETRIES}`,
+        )
+        logForDiagnosticsNoPII('warn', 'cli_sse_post_network_error', {
+          attempt,
+        })
+      }
+
+      if (attempt === POST_MAX_RETRIES) {
+        logForDebugging(
+          `SSETransport: POST failed after ${POST_MAX_RETRIES} attempts, continuing`,
+        )
+        logForDiagnosticsNoPII('warn', 'cli_sse_post_retries_exhausted')
+        return
+      }
+
+      const delayMs = Math.min(
+        POST_BASE_DELAY_MS * Math.pow(2, attempt - 1),
+        POST_MAX_DELAY_MS,
+      )
+      await sleep(delayMs)
+    }
+  }
+
+  // -----------------------------------------------------------------------
+  // Transport interface
+  // -----------------------------------------------------------------------
+
+  isConnectedStatus(): boolean {
+    return this.state === 'connected'
+  }
+
+  isClosedStatus(): boolean {
+    return this.state === 'closed'
+  }
+
+  setOnData(callback: (data: string) => void): void {
+    this.onData = callback
+  }
+
+  setOnClose(callback: (closeCode?: number) => void): void {
+    this.onCloseCallback = callback
+  }
+
+  setOnEvent(callback: (event: StreamClientEvent) => void): void {
+    this.onEventCallback = callback
+  }
+
+  close(): void {
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer)
+      this.reconnectTimer = null
+    }
+    this.clearLivenessTimer()
+
+    this.state = 'closing'
+    this.abortController?.abort()
+    this.abortController = null
+  }
+}
+
+// ---------------------------------------------------------------------------
+// URL Conversion
+// ---------------------------------------------------------------------------
+
+/**
+ * Convert an SSE URL to the HTTP POST endpoint URL.
+ * The SSE stream URL and POST URL share the same base; the POST endpoint
+ * is at `/events` (without `/stream`).
+ *
+ * From: https://api.example.com/v2/session_ingress/session/<session_id>/events/stream
+ * To:   https://api.example.com/v2/session_ingress/session/<session_id>/events
+ */
+function convertSSEUrlToPostUrl(sseUrl: URL): string {
+  let pathname = sseUrl.pathname
+  // Remove /stream suffix to get the POST events endpoint
+  if (pathname.endsWith('/stream')) {
+    pathname = pathname.slice(0, -'/stream'.length)
+  }
+  return `${sseUrl.protocol}//${sseUrl.host}${pathname}`
+}

+ 275 - 0
src/cli/transports/SerialBatchEventUploader.ts

@@ -0,0 +1,275 @@
+import { jsonStringify } from '../../utils/slowOperations.js'
+
+/**
+ * Serial ordered event uploader with batching, retry, and backpressure.
+ *
+ * - enqueue() adds events to a pending buffer
+ * - At most 1 POST in-flight at a time
+ * - Drains up to maxBatchSize items per POST
+ * - New events accumulate while in-flight
+ * - On failure: exponential backoff (clamped), retries indefinitely
+ *   until success or close() — unless maxConsecutiveFailures is set,
+ *   in which case the failing batch is dropped and drain advances
+ * - flush() blocks until pending is empty and kicks drain if needed
+ * - Backpressure: enqueue() blocks when maxQueueSize is reached
+ */
+
+/**
+ * Throw from config.send() to make the uploader wait a server-supplied
+ * duration before retrying (e.g. 429 with Retry-After). When retryAfterMs
+ * is set, it overrides exponential backoff for that attempt — clamped to
+ * [baseDelayMs, maxDelayMs] and jittered so a misbehaving server can
+ * neither hot-loop nor stall the client, and many sessions sharing a rate
+ * limit don't all pounce at the same instant. Without retryAfterMs, behaves
+ * like any other thrown error (exponential backoff).
+ */
+export class RetryableError extends Error {
+  constructor(
+    message: string,
+    readonly retryAfterMs?: number,
+  ) {
+    super(message)
+  }
+}
+
+type SerialBatchEventUploaderConfig<T> = {
+  /** Max items per POST (1 = no batching) */
+  maxBatchSize: number
+  /**
+   * Max serialized bytes per POST. First item always goes in regardless of
+   * size; subsequent items only if cumulative JSON bytes stay under this.
+   * Undefined = no byte limit (count-only batching).
+   */
+  maxBatchBytes?: number
+  /** Max pending items before enqueue() blocks */
+  maxQueueSize: number
+  /** The actual HTTP call — caller controls payload format */
+  send: (batch: T[]) => Promise<void>
+  /** Base delay for exponential backoff (ms) */
+  baseDelayMs: number
+  /** Max delay cap (ms) */
+  maxDelayMs: number
+  /** Random jitter range added to retry delay (ms) */
+  jitterMs: number
+  /**
+   * After this many consecutive send() failures, drop the failing batch
+   * and move on to the next pending item with a fresh failure budget.
+   * Undefined = retry indefinitely (default).
+   */
+  maxConsecutiveFailures?: number
+  /** Called when a batch is dropped for hitting maxConsecutiveFailures. */
+  onBatchDropped?: (batchSize: number, failures: number) => void
+}
+
+export class SerialBatchEventUploader<T> {
+  private pending: T[] = []
+  private pendingAtClose = 0
+  private draining = false
+  private closed = false
+  private backpressureResolvers: Array<() => void> = []
+  private sleepResolve: (() => void) | null = null
+  private flushResolvers: Array<() => void> = []
+  private droppedBatches = 0
+  private readonly config: SerialBatchEventUploaderConfig<T>
+
+  constructor(config: SerialBatchEventUploaderConfig<T>) {
+    this.config = config
+  }
+
+  /**
+   * Monotonic count of batches dropped via maxConsecutiveFailures. Callers
+   * can snapshot before flush() and compare after to detect silent drops
+   * (flush() resolves normally even when batches were dropped).
+   */
+  get droppedBatchCount(): number {
+    return this.droppedBatches
+  }
+
+  /**
+   * Pending queue depth. After close(), returns the count at close time —
+   * close() clears the queue but shutdown diagnostics may read this after.
+   */
+  get pendingCount(): number {
+    return this.closed ? this.pendingAtClose : this.pending.length
+  }
+
+  /**
+   * Add events to the pending buffer. Returns immediately if space is
+   * available. Blocks (awaits) if the buffer is full — caller pauses
+   * until drain frees space.
+   */
+  async enqueue(events: T | T[]): Promise<void> {
+    if (this.closed) return
+    const items = Array.isArray(events) ? events : [events]
+    if (items.length === 0) return
+
+    // Backpressure: wait until there's space
+    while (
+      this.pending.length + items.length > this.config.maxQueueSize &&
+      !this.closed
+    ) {
+      await new Promise<void>(resolve => {
+        this.backpressureResolvers.push(resolve)
+      })
+    }
+
+    if (this.closed) return
+    this.pending.push(...items)
+    void this.drain()
+  }
+
+  /**
+   * Block until all pending events have been sent.
+   * Used at turn boundaries and graceful shutdown.
+   */
+  flush(): Promise<void> {
+    if (this.pending.length === 0 && !this.draining) {
+      return Promise.resolve()
+    }
+    void this.drain()
+    return new Promise<void>(resolve => {
+      this.flushResolvers.push(resolve)
+    })
+  }
+
+  /**
+   * Drop pending events and stop processing.
+   * Resolves any blocked enqueue() and flush() callers.
+   */
+  close(): void {
+    if (this.closed) return
+    this.closed = true
+    this.pendingAtClose = this.pending.length
+    this.pending = []
+    this.sleepResolve?.()
+    this.sleepResolve = null
+    for (const resolve of this.backpressureResolvers) resolve()
+    this.backpressureResolvers = []
+    for (const resolve of this.flushResolvers) resolve()
+    this.flushResolvers = []
+  }
+
+  /**
+   * Drain loop. At most one instance runs at a time (guarded by this.draining).
+   * Sends batches serially. On failure, backs off and retries indefinitely.
+   */
+  private async drain(): Promise<void> {
+    if (this.draining || this.closed) return
+    this.draining = true
+    let failures = 0
+
+    try {
+      while (this.pending.length > 0 && !this.closed) {
+        const batch = this.takeBatch()
+        if (batch.length === 0) continue
+
+        try {
+          await this.config.send(batch)
+          failures = 0
+        } catch (err) {
+          failures++
+          if (
+            this.config.maxConsecutiveFailures !== undefined &&
+            failures >= this.config.maxConsecutiveFailures
+          ) {
+            this.droppedBatches++
+            this.config.onBatchDropped?.(batch.length, failures)
+            failures = 0
+            this.releaseBackpressure()
+            continue
+          }
+          // Re-queue the failed batch at the front. Use concat (single
+          // allocation) instead of unshift(...batch) which shifts every
+          // pending item batch.length times. Only hit on failure path.
+          this.pending = batch.concat(this.pending)
+          const retryAfterMs =
+            err instanceof RetryableError ? err.retryAfterMs : undefined
+          await this.sleep(this.retryDelay(failures, retryAfterMs))
+          continue
+        }
+
+        // Release backpressure waiters if space opened up
+        this.releaseBackpressure()
+      }
+    } finally {
+      this.draining = false
+      // Notify flush waiters if queue is empty
+      if (this.pending.length === 0) {
+        for (const resolve of this.flushResolvers) resolve()
+        this.flushResolvers = []
+      }
+    }
+  }
+
+  /**
+   * Pull the next batch from pending. Respects both maxBatchSize and
+   * maxBatchBytes. The first item is always taken; subsequent items only
+   * if adding them keeps the cumulative JSON size under maxBatchBytes.
+   *
+   * Un-serializable items (BigInt, circular refs, throwing toJSON) are
+   * dropped in place — they can never be sent and leaving them at
+   * pending[0] would poison the queue and hang flush() forever.
+   */
+  private takeBatch(): T[] {
+    const { maxBatchSize, maxBatchBytes } = this.config
+    if (maxBatchBytes === undefined) {
+      return this.pending.splice(0, maxBatchSize)
+    }
+    let bytes = 0
+    let count = 0
+    while (count < this.pending.length && count < maxBatchSize) {
+      let itemBytes: number
+      try {
+        itemBytes = Buffer.byteLength(jsonStringify(this.pending[count]))
+      } catch {
+        this.pending.splice(count, 1)
+        continue
+      }
+      if (count > 0 && bytes + itemBytes > maxBatchBytes) break
+      bytes += itemBytes
+      count++
+    }
+    return this.pending.splice(0, count)
+  }
+
+  private retryDelay(failures: number, retryAfterMs?: number): number {
+    const jitter = Math.random() * this.config.jitterMs
+    if (retryAfterMs !== undefined) {
+      // Jitter on top of the server's hint prevents thundering herd when
+      // many sessions share a rate limit and all receive the same
+      // Retry-After. Clamp first, then spread — same shape as the
+      // exponential path (effective ceiling is maxDelayMs + jitterMs).
+      const clamped = Math.max(
+        this.config.baseDelayMs,
+        Math.min(retryAfterMs, this.config.maxDelayMs),
+      )
+      return clamped + jitter
+    }
+    const exponential = Math.min(
+      this.config.baseDelayMs * 2 ** (failures - 1),
+      this.config.maxDelayMs,
+    )
+    return exponential + jitter
+  }
+
+  private releaseBackpressure(): void {
+    const resolvers = this.backpressureResolvers
+    this.backpressureResolvers = []
+    for (const resolve of resolvers) resolve()
+  }
+
+  private sleep(ms: number): Promise<void> {
+    return new Promise(resolve => {
+      this.sleepResolve = resolve
+      setTimeout(
+        (self, resolve) => {
+          self.sleepResolve = null
+          resolve()
+        },
+        ms,
+        this,
+        resolve,
+      )
+    })
+  }
+}

+ 800 - 0
src/cli/transports/WebSocketTransport.ts

@@ -0,0 +1,800 @@
+import type { StdoutMessage } from 'src/entrypoints/sdk/controlTypes.js'
+import type WsWebSocket from 'ws'
+import { logEvent } from '../../services/analytics/index.js'
+import { CircularBuffer } from '../../utils/CircularBuffer.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { getWebSocketTLSOptions } from '../../utils/mtls.js'
+import {
+  getWebSocketProxyAgent,
+  getWebSocketProxyUrl,
+} from '../../utils/proxy.js'
+import {
+  registerSessionActivityCallback,
+  unregisterSessionActivityCallback,
+} from '../../utils/sessionActivity.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import type { Transport } from './Transport.js'
+
+const KEEP_ALIVE_FRAME = '{"type":"keep_alive"}\n'
+
+const DEFAULT_MAX_BUFFER_SIZE = 1000
+const DEFAULT_BASE_RECONNECT_DELAY = 1000
+const DEFAULT_MAX_RECONNECT_DELAY = 30000
+/** Time budget for reconnection attempts before giving up (10 minutes). */
+const DEFAULT_RECONNECT_GIVE_UP_MS = 600_000
+const DEFAULT_PING_INTERVAL = 10000
+const DEFAULT_KEEPALIVE_INTERVAL = 300_000 // 5 minutes
+
+/**
+ * Threshold for detecting system sleep/wake. If the gap between consecutive
+ * reconnection attempts exceeds this, the machine likely slept. We reset
+ * the reconnection budget and retry — the server will reject with permanent
+ * close codes (4001/1002) if the session was reaped during sleep.
+ */
+const SLEEP_DETECTION_THRESHOLD_MS = DEFAULT_MAX_RECONNECT_DELAY * 2 // 60s
+
+/**
+ * WebSocket close codes that indicate a permanent server-side rejection.
+ * The transport transitions to 'closed' immediately without retrying.
+ */
+const PERMANENT_CLOSE_CODES = new Set([
+  1002, // protocol error — server rejected handshake (e.g. session reaped)
+  4001, // session expired / not found
+  4003, // unauthorized
+])
+
+export type WebSocketTransportOptions = {
+  /** When false, the transport does not attempt automatic reconnection on
+   *  disconnect. Use this when the caller has its own recovery mechanism
+   *  (e.g. the REPL bridge poll loop). Defaults to true. */
+  autoReconnect?: boolean
+  /** Gates the tengu_ws_transport_* telemetry events. Set true at the
+   *  REPL-bridge construction site so only Remote Control sessions (the
+   *  Cloudflare-idle-timeout population) emit; print-mode workers stay
+   *  silent. Defaults to false. */
+  isBridge?: boolean
+}
+
+type WebSocketTransportState =
+  | 'idle'
+  | 'connected'
+  | 'reconnecting'
+  | 'closing'
+  | 'closed'
+
+// Common interface between globalThis.WebSocket and ws.WebSocket
+type WebSocketLike = {
+  close(): void
+  send(data: string): void
+  ping?(): void // Bun & ws both support this
+}
+
+export class WebSocketTransport implements Transport {
+  private ws: WebSocketLike | null = null
+  private lastSentId: string | null = null
+  protected url: URL
+  protected state: WebSocketTransportState = 'idle'
+  protected onData?: (data: string) => void
+  private onCloseCallback?: (closeCode?: number) => void
+  private onConnectCallback?: () => void
+  private headers: Record<string, string>
+  private sessionId?: string
+  private autoReconnect: boolean
+  private isBridge: boolean
+
+  // Reconnection state
+  private reconnectAttempts = 0
+  private reconnectStartTime: number | null = null
+  private reconnectTimer: NodeJS.Timeout | null = null
+  private lastReconnectAttemptTime: number | null = null
+  // Wall-clock of last WS data-frame activity (inbound message or outbound
+  // ws.send). Used to compute idle time at close — the signal for diagnosing
+  // proxy idle-timeout RSTs (e.g. Cloudflare 5-min). Excludes ping/pong
+  // control frames (proxies don't count those).
+  private lastActivityTime = 0
+
+  // Ping interval for connection health checks
+  private pingInterval: NodeJS.Timeout | null = null
+  private pongReceived = true
+
+  // Periodic keep_alive data frames to reset proxy idle timers
+  private keepAliveInterval: NodeJS.Timeout | null = null
+
+  // Message buffering for replay on reconnection
+  private messageBuffer: CircularBuffer<StdoutMessage>
+  // Track which runtime's WS we're using so we can detach listeners
+  // with the matching API (removeEventListener vs. off).
+  private isBunWs = false
+
+  // Captured at connect() time for handleOpenEvent timing. Stored as an
+  // instance field so the onOpen handler can be a stable class-property
+  // arrow function (removable in doDisconnect) instead of a closure over
+  // a local variable.
+  private connectStartTime = 0
+
+  private refreshHeaders?: () => Record<string, string>
+
+  constructor(
+    url: URL,
+    headers: Record<string, string> = {},
+    sessionId?: string,
+    refreshHeaders?: () => Record<string, string>,
+    options?: WebSocketTransportOptions,
+  ) {
+    this.url = url
+    this.headers = headers
+    this.sessionId = sessionId
+    this.refreshHeaders = refreshHeaders
+    this.autoReconnect = options?.autoReconnect ?? true
+    this.isBridge = options?.isBridge ?? false
+    this.messageBuffer = new CircularBuffer(DEFAULT_MAX_BUFFER_SIZE)
+  }
+
+  public async connect(): Promise<void> {
+    if (this.state !== 'idle' && this.state !== 'reconnecting') {
+      logForDebugging(
+        `WebSocketTransport: Cannot connect, current state is ${this.state}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_connect_failed')
+      return
+    }
+    this.state = 'reconnecting'
+
+    this.connectStartTime = Date.now()
+    logForDebugging(`WebSocketTransport: Opening ${this.url.href}`)
+    logForDiagnosticsNoPII('info', 'cli_websocket_connect_opening')
+
+    // Start with provided headers and add runtime headers
+    const headers = { ...this.headers }
+    if (this.lastSentId) {
+      headers['X-Last-Request-Id'] = this.lastSentId
+      logForDebugging(
+        `WebSocketTransport: Adding X-Last-Request-Id header: ${this.lastSentId}`,
+      )
+    }
+
+    if (typeof Bun !== 'undefined') {
+      // Bun's WebSocket supports headers/proxy options but the DOM typings don't
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      const ws = new globalThis.WebSocket(this.url.href, {
+        headers,
+        proxy: getWebSocketProxyUrl(this.url.href),
+        tls: getWebSocketTLSOptions() || undefined,
+      } as unknown as string[])
+      this.ws = ws
+      this.isBunWs = true
+
+      ws.addEventListener('open', this.onBunOpen)
+      ws.addEventListener('message', this.onBunMessage)
+      ws.addEventListener('error', this.onBunError)
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      ws.addEventListener('close', this.onBunClose)
+      // 'pong' is Bun-specific — not in DOM typings.
+      ws.addEventListener('pong', this.onPong)
+    } else {
+      const { default: WS } = await import('ws')
+      const ws = new WS(this.url.href, {
+        headers,
+        agent: getWebSocketProxyAgent(this.url.href),
+        ...getWebSocketTLSOptions(),
+      })
+      this.ws = ws
+      this.isBunWs = false
+
+      ws.on('open', this.onNodeOpen)
+      ws.on('message', this.onNodeMessage)
+      ws.on('error', this.onNodeError)
+      ws.on('close', this.onNodeClose)
+      ws.on('pong', this.onPong)
+    }
+  }
+
+  // --- Bun (native WebSocket) event handlers ---
+  // Stored as class-property arrow functions so they can be removed in
+  // doDisconnect(). Without removal, each reconnect orphans the old WS
+  // object + its 5 closures until GC, which accumulates under network
+  // instability. Mirrors the pattern in src/utils/mcpWebSocketTransport.ts.
+
+  private onBunOpen = () => {
+    this.handleOpenEvent()
+    // Bun's WebSocket doesn't expose upgrade response headers,
+    // so replay all buffered messages. The server deduplicates by UUID.
+    if (this.lastSentId) {
+      this.replayBufferedMessages('')
+    }
+  }
+
+  private onBunMessage = (event: MessageEvent) => {
+    const message =
+      typeof event.data === 'string' ? event.data : String(event.data)
+    this.lastActivityTime = Date.now()
+    logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
+      length: message.length,
+    })
+    if (this.onData) {
+      this.onData(message)
+    }
+  }
+
+  private onBunError = () => {
+    logForDebugging('WebSocketTransport: Error', {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
+    // close event fires after error — let it call handleConnectionError
+  }
+
+  // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+  private onBunClose = (event: CloseEvent) => {
+    const isClean = event.code === 1000 || event.code === 1001
+    logForDebugging(
+      `WebSocketTransport: Closed: ${event.code}`,
+      isClean ? undefined : { level: 'error' },
+    )
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
+    this.handleConnectionError(event.code)
+  }
+
+  // --- Node (ws package) event handlers ---
+
+  private onNodeOpen = () => {
+    // Capture ws before handleOpenEvent() invokes onConnectCallback — if the
+    // callback synchronously closes the transport, this.ws becomes null.
+    // The old inline-closure code had this safety implicitly via closure capture.
+    const ws = this.ws
+    this.handleOpenEvent()
+    if (!ws) return
+    // Check for last-id in upgrade response headers (ws package only)
+    const nws = ws as unknown as WsWebSocket & {
+      upgradeReq?: { headers?: Record<string, string> }
+    }
+    const upgradeResponse = nws.upgradeReq
+    if (upgradeResponse?.headers?.['x-last-request-id']) {
+      const serverLastId = upgradeResponse.headers['x-last-request-id']
+      this.replayBufferedMessages(serverLastId)
+    }
+  }
+
+  private onNodeMessage = (data: Buffer) => {
+    const message = data.toString()
+    this.lastActivityTime = Date.now()
+    logForDiagnosticsNoPII('info', 'cli_websocket_message_received', {
+      length: message.length,
+    })
+    if (this.onData) {
+      this.onData(message)
+    }
+  }
+
+  private onNodeError = (err: Error) => {
+    logForDebugging(`WebSocketTransport: Error: ${err.message}`, {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_error')
+    // close event fires after error — let it call handleConnectionError
+  }
+
+  private onNodeClose = (code: number, _reason: Buffer) => {
+    const isClean = code === 1000 || code === 1001
+    logForDebugging(
+      `WebSocketTransport: Closed: ${code}`,
+      isClean ? undefined : { level: 'error' },
+    )
+    logForDiagnosticsNoPII('error', 'cli_websocket_connect_closed')
+    this.handleConnectionError(code)
+  }
+
+  // --- Shared handlers ---
+
+  private onPong = () => {
+    this.pongReceived = true
+  }
+
+  private handleOpenEvent(): void {
+    const connectDuration = Date.now() - this.connectStartTime
+    logForDebugging('WebSocketTransport: Connected')
+    logForDiagnosticsNoPII('info', 'cli_websocket_connect_connected', {
+      duration_ms: connectDuration,
+    })
+
+    // Reconnect success — capture attempt count + downtime before resetting.
+    // reconnectStartTime is null on first connect, non-null on reopen.
+    if (this.isBridge && this.reconnectStartTime !== null) {
+      logEvent('tengu_ws_transport_reconnected', {
+        attempts: this.reconnectAttempts,
+        downtimeMs: Date.now() - this.reconnectStartTime,
+      })
+    }
+
+    this.reconnectAttempts = 0
+    this.reconnectStartTime = null
+    this.lastReconnectAttemptTime = null
+    this.lastActivityTime = Date.now()
+    this.state = 'connected'
+    this.onConnectCallback?.()
+
+    // Start periodic pings to detect dead connections
+    this.startPingInterval()
+
+    // Start periodic keep_alive data frames to reset proxy idle timers
+    this.startKeepaliveInterval()
+
+    // Register callback for session activity signals
+    registerSessionActivityCallback(() => {
+      void this.write({ type: 'keep_alive' })
+    })
+  }
+
+  protected sendLine(line: string): boolean {
+    if (!this.ws || this.state !== 'connected') {
+      logForDebugging('WebSocketTransport: Not connected')
+      logForDiagnosticsNoPII('info', 'cli_websocket_send_not_connected')
+      return false
+    }
+
+    try {
+      this.ws.send(line)
+      this.lastActivityTime = Date.now()
+      return true
+    } catch (error) {
+      logForDebugging(`WebSocketTransport: Failed to send: ${error}`, {
+        level: 'error',
+      })
+      logForDiagnosticsNoPII('error', 'cli_websocket_send_error')
+      // Don't null this.ws here — let doDisconnect() (via handleConnectionError)
+      // handle cleanup so listeners are removed before the WS is released.
+      this.handleConnectionError()
+      return false
+    }
+  }
+
+  /**
+   * Remove all listeners attached in connect() for the given WebSocket.
+   * Without this, each reconnect orphans the old WS object + its closures
+   * until GC — these accumulate under network instability. Mirrors the
+   * pattern in src/utils/mcpWebSocketTransport.ts.
+   */
+  private removeWsListeners(ws: WebSocketLike): void {
+    if (this.isBunWs) {
+      const nws = ws as unknown as globalThis.WebSocket
+      nws.removeEventListener('open', this.onBunOpen)
+      nws.removeEventListener('message', this.onBunMessage)
+      nws.removeEventListener('error', this.onBunError)
+      // eslint-disable-next-line eslint-plugin-n/no-unsupported-features/node-builtins
+      nws.removeEventListener('close', this.onBunClose)
+      // 'pong' is Bun-specific — not in DOM typings
+      nws.removeEventListener('pong' as 'message', this.onPong)
+    } else {
+      const nws = ws as unknown as WsWebSocket
+      nws.off('open', this.onNodeOpen)
+      nws.off('message', this.onNodeMessage)
+      nws.off('error', this.onNodeError)
+      nws.off('close', this.onNodeClose)
+      nws.off('pong', this.onPong)
+    }
+  }
+
+  protected doDisconnect(): void {
+    // Stop pinging and keepalive when disconnecting
+    this.stopPingInterval()
+    this.stopKeepaliveInterval()
+
+    // Unregister session activity callback
+    unregisterSessionActivityCallback()
+
+    if (this.ws) {
+      // Remove listeners BEFORE close() so the old WS + closures can be
+      // GC'd promptly instead of lingering until the next mark-and-sweep.
+      this.removeWsListeners(this.ws)
+      this.ws.close()
+      this.ws = null
+    }
+  }
+
+  private handleConnectionError(closeCode?: number): void {
+    logForDebugging(
+      `WebSocketTransport: Disconnected from ${this.url.href}` +
+        (closeCode != null ? ` (code ${closeCode})` : ''),
+    )
+    logForDiagnosticsNoPII('info', 'cli_websocket_disconnected')
+    if (this.isBridge) {
+      // Fire on every close — including intermediate ones during a reconnect
+      // storm (those never surface to the onCloseCallback consumer). For the
+      // Cloudflare-5min-idle hypothesis: cluster msSinceLastActivity; if the
+      // peak sits at ~300s with closeCode 1006, that's the proxy RST.
+      logEvent('tengu_ws_transport_closed', {
+        closeCode,
+        msSinceLastActivity:
+          this.lastActivityTime > 0 ? Date.now() - this.lastActivityTime : -1,
+        // 'connected' = healthy drop (the Cloudflare case); 'reconnecting' =
+        // connect-rejection mid-storm. State isn't mutated until the branches
+        // below, so this reads the pre-close value.
+        wasConnected: this.state === 'connected',
+        reconnectAttempts: this.reconnectAttempts,
+      })
+    }
+    this.doDisconnect()
+
+    if (this.state === 'closing' || this.state === 'closed') return
+
+    // Permanent codes: don't retry — server has definitively ended the session.
+    // Exception: 4003 (unauthorized) can be retried when refreshHeaders is
+    // available and returns a new token (e.g. after the parent process mints
+    // a fresh session ingress token during reconnection).
+    let headersRefreshed = false
+    if (closeCode === 4003 && this.refreshHeaders) {
+      const freshHeaders = this.refreshHeaders()
+      if (freshHeaders.Authorization !== this.headers.Authorization) {
+        Object.assign(this.headers, freshHeaders)
+        headersRefreshed = true
+        logForDebugging(
+          'WebSocketTransport: 4003 received but headers refreshed, scheduling reconnect',
+        )
+        logForDiagnosticsNoPII('info', 'cli_websocket_4003_token_refreshed')
+      }
+    }
+
+    if (
+      closeCode != null &&
+      PERMANENT_CLOSE_CODES.has(closeCode) &&
+      !headersRefreshed
+    ) {
+      logForDebugging(
+        `WebSocketTransport: Permanent close code ${closeCode}, not reconnecting`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_permanent_close', {
+        closeCode,
+      })
+      this.state = 'closed'
+      this.onCloseCallback?.(closeCode)
+      return
+    }
+
+    // When autoReconnect is disabled, go straight to closed state.
+    // The caller (e.g. REPL bridge poll loop) handles recovery.
+    if (!this.autoReconnect) {
+      this.state = 'closed'
+      this.onCloseCallback?.(closeCode)
+      return
+    }
+
+    // Schedule reconnection with exponential backoff and time budget
+    const now = Date.now()
+    if (!this.reconnectStartTime) {
+      this.reconnectStartTime = now
+    }
+
+    // Detect system sleep/wake: if the gap since our last reconnection
+    // attempt greatly exceeds the max delay, the machine likely slept
+    // (e.g. laptop lid closed). Reset the budget and retry from scratch —
+    // the server will reject with permanent close codes (4001/1002) if
+    // the session was reaped while we were asleep.
+    if (
+      this.lastReconnectAttemptTime !== null &&
+      now - this.lastReconnectAttemptTime > SLEEP_DETECTION_THRESHOLD_MS
+    ) {
+      logForDebugging(
+        `WebSocketTransport: Detected system sleep (${Math.round((now - this.lastReconnectAttemptTime) / 1000)}s gap), resetting reconnection budget`,
+      )
+      logForDiagnosticsNoPII('info', 'cli_websocket_sleep_detected', {
+        gapMs: now - this.lastReconnectAttemptTime,
+      })
+      this.reconnectStartTime = now
+      this.reconnectAttempts = 0
+    }
+    this.lastReconnectAttemptTime = now
+
+    const elapsed = now - this.reconnectStartTime
+    if (elapsed < DEFAULT_RECONNECT_GIVE_UP_MS) {
+      // Clear any existing reconnection timer to avoid duplicates
+      if (this.reconnectTimer) {
+        clearTimeout(this.reconnectTimer)
+        this.reconnectTimer = null
+      }
+
+      // Refresh headers before reconnecting (e.g. to pick up a new session token).
+      // Skip if already refreshed by the 4003 path above.
+      if (!headersRefreshed && this.refreshHeaders) {
+        const freshHeaders = this.refreshHeaders()
+        Object.assign(this.headers, freshHeaders)
+        logForDebugging('WebSocketTransport: Refreshed headers for reconnect')
+      }
+
+      this.state = 'reconnecting'
+      this.reconnectAttempts++
+
+      const baseDelay = Math.min(
+        DEFAULT_BASE_RECONNECT_DELAY * Math.pow(2, this.reconnectAttempts - 1),
+        DEFAULT_MAX_RECONNECT_DELAY,
+      )
+      // Add ±25% jitter to avoid thundering herd
+      const delay = Math.max(
+        0,
+        baseDelay + baseDelay * 0.25 * (2 * Math.random() - 1),
+      )
+
+      logForDebugging(
+        `WebSocketTransport: Reconnecting in ${Math.round(delay)}ms (attempt ${this.reconnectAttempts}, ${Math.round(elapsed / 1000)}s elapsed)`,
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_attempt', {
+        reconnectAttempts: this.reconnectAttempts,
+      })
+      if (this.isBridge) {
+        logEvent('tengu_ws_transport_reconnecting', {
+          attempt: this.reconnectAttempts,
+          elapsedMs: elapsed,
+          delayMs: Math.round(delay),
+        })
+      }
+
+      this.reconnectTimer = setTimeout(() => {
+        this.reconnectTimer = null
+        void this.connect()
+      }, delay)
+    } else {
+      logForDebugging(
+        `WebSocketTransport: Reconnection time budget exhausted after ${Math.round(elapsed / 1000)}s for ${this.url.href}`,
+        { level: 'error' },
+      )
+      logForDiagnosticsNoPII('error', 'cli_websocket_reconnect_exhausted', {
+        reconnectAttempts: this.reconnectAttempts,
+        elapsedMs: elapsed,
+      })
+      this.state = 'closed'
+
+      // Notify close callback
+      if (this.onCloseCallback) {
+        this.onCloseCallback(closeCode)
+      }
+    }
+  }
+
+  close(): void {
+    // Clear any pending reconnection timer
+    if (this.reconnectTimer) {
+      clearTimeout(this.reconnectTimer)
+      this.reconnectTimer = null
+    }
+
+    // Clear ping and keepalive intervals
+    this.stopPingInterval()
+    this.stopKeepaliveInterval()
+
+    // Unregister session activity callback
+    unregisterSessionActivityCallback()
+
+    this.state = 'closing'
+    this.doDisconnect()
+  }
+
+  private replayBufferedMessages(lastId: string): void {
+    const messages = this.messageBuffer.toArray()
+    if (messages.length === 0) return
+
+    // Find where to start replay based on server's last received message
+    let startIndex = 0
+    if (lastId) {
+      const lastConfirmedIndex = messages.findIndex(
+        message => 'uuid' in message && message.uuid === lastId,
+      )
+      if (lastConfirmedIndex >= 0) {
+        // Server confirmed messages up to lastConfirmedIndex — evict them
+        startIndex = lastConfirmedIndex + 1
+        // Rebuild the buffer with only unconfirmed messages
+        const remaining = messages.slice(startIndex)
+        this.messageBuffer.clear()
+        this.messageBuffer.addAll(remaining)
+        if (remaining.length === 0) {
+          this.lastSentId = null
+        }
+        logForDebugging(
+          `WebSocketTransport: Evicted ${startIndex} confirmed messages, ${remaining.length} remaining`,
+        )
+        logForDiagnosticsNoPII(
+          'info',
+          'cli_websocket_evicted_confirmed_messages',
+          {
+            evicted: startIndex,
+            remaining: remaining.length,
+          },
+        )
+      }
+    }
+
+    const messagesToReplay = messages.slice(startIndex)
+    if (messagesToReplay.length === 0) {
+      logForDebugging('WebSocketTransport: No new messages to replay')
+      logForDiagnosticsNoPII('info', 'cli_websocket_no_messages_to_replay')
+      return
+    }
+
+    logForDebugging(
+      `WebSocketTransport: Replaying ${messagesToReplay.length} buffered messages`,
+    )
+    logForDiagnosticsNoPII('info', 'cli_websocket_messages_to_replay', {
+      count: messagesToReplay.length,
+    })
+
+    for (const message of messagesToReplay) {
+      const line = jsonStringify(message) + '\n'
+      const success = this.sendLine(line)
+      if (!success) {
+        this.handleConnectionError()
+        break
+      }
+    }
+    // Do NOT clear the buffer after replay — messages remain buffered until
+    // the server confirms receipt on the next reconnection. This prevents
+    // message loss if the connection drops after replay but before the server
+    // processes the messages.
+  }
+
+  isConnectedStatus(): boolean {
+    return this.state === 'connected'
+  }
+
+  isClosedStatus(): boolean {
+    return this.state === 'closed'
+  }
+
+  setOnData(callback: (data: string) => void): void {
+    this.onData = callback
+  }
+
+  setOnConnect(callback: () => void): void {
+    this.onConnectCallback = callback
+  }
+
+  setOnClose(callback: (closeCode?: number) => void): void {
+    this.onCloseCallback = callback
+  }
+
+  getStateLabel(): string {
+    return this.state
+  }
+
+  async write(message: StdoutMessage): Promise<void> {
+    if ('uuid' in message && typeof message.uuid === 'string') {
+      this.messageBuffer.add(message)
+      this.lastSentId = message.uuid
+    }
+
+    const line = jsonStringify(message) + '\n'
+
+    if (this.state !== 'connected') {
+      // Message buffered for replay when connected (if it has a UUID)
+      return
+    }
+
+    const sessionLabel = this.sessionId ? ` session=${this.sessionId}` : ''
+    const detailLabel = this.getControlMessageDetailLabel(message)
+
+    logForDebugging(
+      `WebSocketTransport: Sending message type=${message.type}${sessionLabel}${detailLabel}`,
+    )
+
+    this.sendLine(line)
+  }
+
+  private getControlMessageDetailLabel(message: StdoutMessage): string {
+    if (message.type === 'control_request') {
+      const { request_id, request } = message
+      const toolName =
+        request.subtype === 'can_use_tool' ? request.tool_name : ''
+      return ` subtype=${request.subtype} request_id=${request_id}${toolName ? ` tool=${toolName}` : ''}`
+    }
+    if (message.type === 'control_response') {
+      const { subtype, request_id } = message.response
+      return ` subtype=${subtype} request_id=${request_id}`
+    }
+    return ''
+  }
+
+  private startPingInterval(): void {
+    // Clear any existing interval
+    this.stopPingInterval()
+
+    this.pongReceived = true
+    let lastTickTime = Date.now()
+
+    // Send ping periodically to detect dead connections.
+    // If the previous ping got no pong, treat the connection as dead.
+    this.pingInterval = setInterval(() => {
+      if (this.state === 'connected' && this.ws) {
+        const now = Date.now()
+        const gap = now - lastTickTime
+        lastTickTime = now
+
+        // Process-suspension detector. If the wall-clock gap between ticks
+        // greatly exceeds the 10s interval, the process was suspended
+        // (laptop lid, SIGSTOP, VM pause). setInterval does not queue
+        // missed ticks — it coalesces — so on wake this callback fires
+        // once with a huge gap. The socket is almost certainly dead:
+        // NAT mappings drop in 30s–5min, and the server has been
+        // retransmitting into the void. Don't wait for a ping/pong
+        // round-trip to confirm (ws.ping() on a dead socket returns
+        // immediately with no error — bytes go into the kernel send
+        // buffer). Assume dead and reconnect now. A spurious reconnect
+        // after a short sleep is cheap — replayBufferedMessages() handles
+        // it and the server dedups by UUID.
+        if (gap > SLEEP_DETECTION_THRESHOLD_MS) {
+          logForDebugging(
+            `WebSocketTransport: ${Math.round(gap / 1000)}s tick gap detected — process was suspended, forcing reconnect`,
+          )
+          logForDiagnosticsNoPII(
+            'info',
+            'cli_websocket_sleep_detected_on_ping',
+            { gapMs: gap },
+          )
+          this.handleConnectionError()
+          return
+        }
+
+        if (!this.pongReceived) {
+          logForDebugging(
+            'WebSocketTransport: No pong received, connection appears dead',
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_websocket_pong_timeout')
+          this.handleConnectionError()
+          return
+        }
+
+        this.pongReceived = false
+        try {
+          this.ws.ping?.()
+        } catch (error) {
+          logForDebugging(`WebSocketTransport: Ping failed: ${error}`, {
+            level: 'error',
+          })
+          logForDiagnosticsNoPII('error', 'cli_websocket_ping_failed')
+        }
+      }
+    }, DEFAULT_PING_INTERVAL)
+  }
+
+  private stopPingInterval(): void {
+    if (this.pingInterval) {
+      clearInterval(this.pingInterval)
+      this.pingInterval = null
+    }
+  }
+
+  private startKeepaliveInterval(): void {
+    this.stopKeepaliveInterval()
+
+    // In CCR sessions, session activity heartbeats handle keep-alives
+    if (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) {
+      return
+    }
+
+    this.keepAliveInterval = setInterval(() => {
+      if (this.state === 'connected' && this.ws) {
+        try {
+          this.ws.send(KEEP_ALIVE_FRAME)
+          this.lastActivityTime = Date.now()
+          logForDebugging(
+            'WebSocketTransport: Sent periodic keep_alive data frame',
+          )
+        } catch (error) {
+          logForDebugging(
+            `WebSocketTransport: Periodic keep_alive failed: ${error}`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_websocket_keepalive_failed')
+        }
+      }
+    }, DEFAULT_KEEPALIVE_INTERVAL)
+  }
+
+  private stopKeepaliveInterval(): void {
+    if (this.keepAliveInterval) {
+      clearInterval(this.keepAliveInterval)
+      this.keepAliveInterval = null
+    }
+  }
+}

+ 131 - 0
src/cli/transports/WorkerStateUploader.ts

@@ -0,0 +1,131 @@
+import { sleep } from '../../utils/sleep.js'
+
+/**
+ * Coalescing uploader for PUT /worker (session state + metadata).
+ *
+ * - 1 in-flight PUT + 1 pending patch
+ * - New calls coalesce into pending (never grows beyond 1 slot)
+ * - On success: send pending if exists
+ * - On failure: exponential backoff (clamped), retries indefinitely
+ *   until success or close(). Absorbs any pending patches before each retry.
+ * - No backpressure needed — naturally bounded at 2 slots
+ *
+ * Coalescing rules:
+ * - Top-level keys (worker_status, external_metadata) — last value wins
+ * - Inside external_metadata / internal_metadata — RFC 7396 merge:
+ *   keys are added/overwritten, null values preserved (server deletes)
+ */
+
+type WorkerStateUploaderConfig = {
+  send: (body: Record<string, unknown>) => Promise<boolean>
+  /** Base delay for exponential backoff (ms) */
+  baseDelayMs: number
+  /** Max delay cap (ms) */
+  maxDelayMs: number
+  /** Random jitter range added to retry delay (ms) */
+  jitterMs: number
+}
+
+export class WorkerStateUploader {
+  private inflight: Promise<void> | null = null
+  private pending: Record<string, unknown> | null = null
+  private closed = false
+  private readonly config: WorkerStateUploaderConfig
+
+  constructor(config: WorkerStateUploaderConfig) {
+    this.config = config
+  }
+
+  /**
+   * Enqueue a patch to PUT /worker. Coalesces with any existing pending
+   * patch. Fire-and-forget — callers don't need to await.
+   */
+  enqueue(patch: Record<string, unknown>): void {
+    if (this.closed) return
+    this.pending = this.pending ? coalescePatches(this.pending, patch) : patch
+    void this.drain()
+  }
+
+  close(): void {
+    this.closed = true
+    this.pending = null
+  }
+
+  private async drain(): Promise<void> {
+    if (this.inflight || this.closed) return
+    if (!this.pending) return
+
+    const payload = this.pending
+    this.pending = null
+
+    this.inflight = this.sendWithRetry(payload).then(() => {
+      this.inflight = null
+      if (this.pending && !this.closed) {
+        void this.drain()
+      }
+    })
+  }
+
+  /** Retries indefinitely with exponential backoff until success or close(). */
+  private async sendWithRetry(payload: Record<string, unknown>): Promise<void> {
+    let current = payload
+    let failures = 0
+    while (!this.closed) {
+      const ok = await this.config.send(current)
+      if (ok) return
+
+      failures++
+      await sleep(this.retryDelay(failures))
+
+      // Absorb any patches that arrived during the retry
+      if (this.pending && !this.closed) {
+        current = coalescePatches(current, this.pending)
+        this.pending = null
+      }
+    }
+  }
+
+  private retryDelay(failures: number): number {
+    const exponential = Math.min(
+      this.config.baseDelayMs * 2 ** (failures - 1),
+      this.config.maxDelayMs,
+    )
+    const jitter = Math.random() * this.config.jitterMs
+    return exponential + jitter
+  }
+}
+
+/**
+ * Coalesce two patches for PUT /worker.
+ *
+ * Top-level keys: overlay replaces base (last value wins).
+ * Metadata keys (external_metadata, internal_metadata): RFC 7396 merge
+ * one level deep — overlay keys are added/overwritten, null values
+ * preserved for server-side delete.
+ */
+function coalescePatches(
+  base: Record<string, unknown>,
+  overlay: Record<string, unknown>,
+): Record<string, unknown> {
+  const merged = { ...base }
+
+  for (const [key, value] of Object.entries(overlay)) {
+    if (
+      (key === 'external_metadata' || key === 'internal_metadata') &&
+      merged[key] &&
+      typeof merged[key] === 'object' &&
+      typeof value === 'object' &&
+      value !== null
+    ) {
+      // RFC 7396 merge — overlay keys win, nulls preserved for server
+      merged[key] = {
+        ...(merged[key] as Record<string, unknown>),
+        ...(value as Record<string, unknown>),
+      }
+    } else {
+      merged[key] = value
+    }
+  }
+
+  return merged
+}

+ 998 - 0
src/cli/transports/ccrClient.ts

@@ -0,0 +1,998 @@
+import { randomUUID } from 'crypto'
+import type {
+  SDKPartialAssistantMessage,
+  StdoutMessage,
+} from 'src/entrypoints/sdk/controlTypes.js'
+import { decodeJwtExpiry } from '../../bridge/jwtUtils.js'
+import { logForDebugging } from '../../utils/debug.js'
+import { logForDiagnosticsNoPII } from '../../utils/diagLogs.js'
+import { errorMessage, getErrnoCode } from '../../utils/errors.js'
+import { createAxiosInstance } from '../../utils/proxy.js'
+import {
+  registerSessionActivityCallback,
+  unregisterSessionActivityCallback,
+} from '../../utils/sessionActivity.js'
+import {
+  getSessionIngressAuthHeaders,
+  getSessionIngressAuthToken,
+} from '../../utils/sessionIngressAuth.js'
+import type {
+  RequiresActionDetails,
+  SessionState,
+} from '../../utils/sessionState.js'
+import { sleep } from '../../utils/sleep.js'
+import { getClaudeCodeUserAgent } from '../../utils/userAgent.js'
+import {
+  RetryableError,
+  SerialBatchEventUploader,
+} from './SerialBatchEventUploader.js'
+import type { SSETransport, StreamClientEvent } from './SSETransport.js'
+import { WorkerStateUploader } from './WorkerStateUploader.js'
+
+/** Default interval between heartbeat events (20s; server TTL is 60s). */
+const DEFAULT_HEARTBEAT_INTERVAL_MS = 20_000
+
+/**
+ * stream_event messages accumulate in a delay buffer for up to this many ms
+ * before enqueue. Mirrors HybridTransport's batching window. text_delta
+ * events for the same content block accumulate into a single full-so-far
+ * snapshot per flush — each emitted event is self-contained so a client
+ * connecting mid-stream sees complete text, not a fragment.
+ */
+const STREAM_EVENT_FLUSH_INTERVAL_MS = 100
+
+/** Hoisted axios validateStatus callback to avoid per-request closure allocation. */
+function alwaysValidStatus(): boolean {
+  return true
+}
+
+export type CCRInitFailReason =
+  | 'no_auth_headers'
+  | 'missing_epoch'
+  | 'worker_register_failed'
+
+/** Thrown by initialize(); carries a typed reason for the diag classifier. */
+export class CCRInitError extends Error {
+  constructor(readonly reason: CCRInitFailReason) {
+    super(`CCRClient init failed: ${reason}`)
+  }
+}
+
+/**
+ * Consecutive 401/403 with a VALID-LOOKING token before giving up. An
+ * expired JWT short-circuits this (exits immediately — deterministic,
+ * retry is futile). This threshold is for the uncertain case: token's
+ * exp is in the future but server says 401 (userauth down, KMS hiccup,
+ * clock skew). 10 × 20s heartbeat ≈ 200s to ride it out.
+ */
+const MAX_CONSECUTIVE_AUTH_FAILURES = 10
+
+type EventPayload = {
+  uuid: string
+  type: string
+  [key: string]: unknown
+}
+
+type ClientEvent = {
+  payload: EventPayload
+  ephemeral?: boolean
+}
+
+/**
+ * Structural subset of a stream_event carrying a text_delta. Not a narrowing
+ * of SDKPartialAssistantMessage — RawMessageStreamEvent's delta is a union and
+ * narrowing through two levels defeats the discriminant.
+ */
+type CoalescedStreamEvent = {
+  type: 'stream_event'
+  uuid: string
+  session_id: string
+  parent_tool_use_id: string | null
+  event: {
+    type: 'content_block_delta'
+    index: number
+    delta: { type: 'text_delta'; text: string }
+  }
+}
+
+/**
+ * Accumulator state for text_delta coalescing. Keyed by API message ID so
+ * lifetime is tied to the assistant message — cleared when the complete
+ * SDKAssistantMessage arrives (writeEvent), which is reliable even when
+ * abort/error paths skip content_block_stop/message_stop delivery.
+ */
+export type StreamAccumulatorState = {
+  /** API message ID (msg_...) → blocks[blockIndex] → chunk array. */
+  byMessage: Map<string, string[][]>
+  /**
+   * {session_id}:{parent_tool_use_id} → active message ID.
+   * content_block_delta events don't carry the message ID (only
+   * message_start does), so we track which message is currently streaming
+   * for each scope. At most one message streams per scope at a time.
+   */
+  scopeToMessage: Map<string, string>
+}
+
+export function createStreamAccumulator(): StreamAccumulatorState {
+  return { byMessage: new Map(), scopeToMessage: new Map() }
+}
+
+function scopeKey(m: {
+  session_id: string
+  parent_tool_use_id: string | null
+}): string {
+  return `${m.session_id}:${m.parent_tool_use_id ?? ''}`
+}
+
+/**
+ * Accumulate text_delta stream_events into full-so-far snapshots per content
+ * block. Each flush emits ONE event per touched block containing the FULL
+ * accumulated text from the start of the block — a client connecting
+ * mid-stream receives a self-contained snapshot, not a fragment.
+ *
+ * Non-text-delta events pass through unchanged. message_start records the
+ * active message ID for the scope; content_block_delta appends chunks;
+ * the snapshot event reuses the first text_delta UUID seen for that block in
+ * this flush so server-side idempotency remains stable across retries.
+ *
+ * Cleanup happens in writeEvent when the complete assistant message arrives
+ * (reliable), not here on stop events (abort/error paths skip those).
+ */
+export function accumulateStreamEvents(
+  buffer: SDKPartialAssistantMessage[],
+  state: StreamAccumulatorState,
+): EventPayload[] {
+  const out: EventPayload[] = []
+  // chunks[] → snapshot already in `out` this flush. Keyed by the chunks
+  // array reference (stable per {messageId, index}) so subsequent deltas
+  // rewrite the same entry instead of emitting one event per delta.
+  const touched = new Map<string[], CoalescedStreamEvent>()
+  for (const msg of buffer) {
+    switch (msg.event.type) {
+      case 'message_start': {
+        const id = msg.event.message.id
+        const prevId = state.scopeToMessage.get(scopeKey(msg))
+        if (prevId) state.byMessage.delete(prevId)
+        state.scopeToMessage.set(scopeKey(msg), id)
+        state.byMessage.set(id, [])
+        out.push(msg)
+        break
+      }
+      case 'content_block_delta': {
+        if (msg.event.delta.type !== 'text_delta') {
+          out.push(msg)
+          break
+        }
+        const messageId = state.scopeToMessage.get(scopeKey(msg))
+        const blocks = messageId ? state.byMessage.get(messageId) : undefined
+        if (!blocks) {
+          // Delta without a preceding message_start (reconnect mid-stream,
+          // or message_start was in a prior buffer that got dropped). Pass
+          // through raw — can't produce a full-so-far snapshot without the
+          // prior chunks anyway.
+          out.push(msg)
+          break
+        }
+        const chunks = (blocks[msg.event.index] ??= [])
+        chunks.push(msg.event.delta.text)
+        const existing = touched.get(chunks)
+        if (existing) {
+          existing.event.delta.text = chunks.join('')
+          break
+        }
+        const snapshot: CoalescedStreamEvent = {
+          type: 'stream_event',
+          uuid: msg.uuid,
+          session_id: msg.session_id,
+          parent_tool_use_id: msg.parent_tool_use_id,
+          event: {
+            type: 'content_block_delta',
+            index: msg.event.index,
+            delta: { type: 'text_delta', text: chunks.join('') },
+          },
+        }
+        touched.set(chunks, snapshot)
+        out.push(snapshot)
+        break
+      }
+      default:
+        out.push(msg)
+    }
+  }
+  return out
+}
+
+/**
+ * Clear accumulator entries for a completed assistant message. Called from
+ * writeEvent when the SDKAssistantMessage arrives — the reliable end-of-stream
+ * signal that fires even when abort/interrupt/error skip SSE stop events.
+ */
+export function clearStreamAccumulatorForMessage(
+  state: StreamAccumulatorState,
+  assistant: {
+    session_id: string
+    parent_tool_use_id: string | null
+    message: { id: string }
+  },
+): void {
+  state.byMessage.delete(assistant.message.id)
+  const scope = scopeKey(assistant)
+  if (state.scopeToMessage.get(scope) === assistant.message.id) {
+    state.scopeToMessage.delete(scope)
+  }
+}
+
+type RequestResult = { ok: true } | { ok: false; retryAfterMs?: number }
+
+type WorkerEvent = {
+  payload: EventPayload
+  is_compaction?: boolean
+  agent_id?: string
+}
+
+export type InternalEvent = {
+  event_id: string
+  event_type: string
+  payload: Record<string, unknown>
+  event_metadata?: Record<string, unknown> | null
+  is_compaction: boolean
+  created_at: string
+  agent_id?: string
+}
+
+type ListInternalEventsResponse = {
+  data: InternalEvent[]
+  next_cursor?: string
+}
+
+type WorkerStateResponse = {
+  worker?: {
+    external_metadata?: Record<string, unknown>
+  }
+}
+
+/**
+ * Manages the worker lifecycle protocol with CCR v2:
+ * - Epoch management: reads worker_epoch from CLAUDE_CODE_WORKER_EPOCH env var
+ * - Runtime state reporting: PUT /sessions/{id}/worker
+ * - Heartbeat: POST /sessions/{id}/worker/heartbeat for liveness detection
+ *
+ * All writes go through this.request().
+ */
+export class CCRClient {
+  private workerEpoch = 0
+  private readonly heartbeatIntervalMs: number
+  private readonly heartbeatJitterFraction: number
+  private heartbeatTimer: NodeJS.Timeout | null = null
+  private heartbeatInFlight = false
+  private closed = false
+  private consecutiveAuthFailures = 0
+  private currentState: SessionState | null = null
+  private readonly sessionBaseUrl: string
+  private readonly sessionId: string
+  private readonly http = createAxiosInstance({ keepAlive: true })
+
+  // stream_event delay buffer — accumulates content deltas for up to
+  // STREAM_EVENT_FLUSH_INTERVAL_MS before enqueueing (reduces POST count
+  // and enables text_delta coalescing). Mirrors HybridTransport's pattern.
+  private streamEventBuffer: SDKPartialAssistantMessage[] = []
+  private streamEventTimer: ReturnType<typeof setTimeout> | null = null
+  // Full-so-far text accumulator. Persists across flushes so each emitted
+  // text_delta event carries the complete text from the start of the block —
+  // mid-stream reconnects see a self-contained snapshot. Keyed by API message
+  // ID; cleared in writeEvent when the complete assistant message arrives.
+  private streamTextAccumulator = createStreamAccumulator()
+
+  private readonly workerState: WorkerStateUploader
+  private readonly eventUploader: SerialBatchEventUploader<ClientEvent>
+  private readonly internalEventUploader: SerialBatchEventUploader<WorkerEvent>
+  private readonly deliveryUploader: SerialBatchEventUploader<{
+    eventId: string
+    status: 'received' | 'processing' | 'processed'
+  }>
+
+  /**
+   * Called when the server returns 409 (a newer worker epoch superseded ours).
+   * Default: process.exit(1) — correct for spawn-mode children where the
+   * parent bridge re-spawns. In-process callers (replBridge) MUST override
+   * this to close gracefully instead; exit would kill the user's REPL.
+   */
+  private readonly onEpochMismatch: () => never
+
+  /**
+   * Auth header source. Defaults to the process-wide session-ingress token
+   * (CLAUDE_CODE_SESSION_ACCESS_TOKEN env var). Callers managing multiple
+   * concurrent sessions with distinct JWTs MUST inject this — the env-var
+   * path is a process global and would stomp across sessions.
+   */
+  private readonly getAuthHeaders: () => Record<string, string>
+
+  constructor(
+    transport: SSETransport,
+    sessionUrl: URL,
+    opts?: {
+      onEpochMismatch?: () => never
+      heartbeatIntervalMs?: number
+      heartbeatJitterFraction?: number
+      /**
+       * Per-instance auth header source. Omit to read the process-wide
+       * CLAUDE_CODE_SESSION_ACCESS_TOKEN (single-session callers — REPL,
+       * daemon). Required for concurrent multi-session callers.
+       */
+      getAuthHeaders?: () => Record<string, string>
+    },
+  ) {
+    this.onEpochMismatch =
+      opts?.onEpochMismatch ??
+      (() => {
+        // eslint-disable-next-line custom-rules/no-process-exit
+        process.exit(1)
+      })
+    this.heartbeatIntervalMs =
+      opts?.heartbeatIntervalMs ?? DEFAULT_HEARTBEAT_INTERVAL_MS
+    this.heartbeatJitterFraction = opts?.heartbeatJitterFraction ?? 0
+    this.getAuthHeaders = opts?.getAuthHeaders ?? getSessionIngressAuthHeaders
+    // Session URL: https://host/v1/code/sessions/{id}
+    if (sessionUrl.protocol !== 'http:' && sessionUrl.protocol !== 'https:') {
+      throw new Error(
+        `CCRClient: Expected http(s) URL, got ${sessionUrl.protocol}`,
+      )
+    }
+    const pathname = sessionUrl.pathname.replace(/\/$/, '')
+    this.sessionBaseUrl = `${sessionUrl.protocol}//${sessionUrl.host}${pathname}`
+    // Extract session ID from the URL path (last segment)
+    this.sessionId = pathname.split('/').pop() || ''
+
+    this.workerState = new WorkerStateUploader({
+      send: body =>
+        this.request(
+          'put',
+          '/worker',
+          { worker_epoch: this.workerEpoch, ...body },
+          'PUT worker',
+        ).then(r => r.ok),
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    this.eventUploader = new SerialBatchEventUploader<ClientEvent>({
+      maxBatchSize: 100,
+      maxBatchBytes: 10 * 1024 * 1024,
+      // flushStreamEventBuffer() enqueues a full 100ms window of accumulated
+      // stream_events in one call. A burst of mixed delta types that don't
+      // fold into a single snapshot could exceed the old cap (50) and deadlock
+      // on the SerialBatchEventUploader backpressure check. Match
+      // HybridTransport's bound — high enough to be memory-only.
+      maxQueueSize: 100_000,
+      send: async batch => {
+        const result = await this.request(
+          'post',
+          '/worker/events',
+          { worker_epoch: this.workerEpoch, events: batch },
+          'client events',
+        )
+        if (!result.ok) {
+          throw new RetryableError(
+            'client event POST failed',
+            result.retryAfterMs,
+          )
+        }
+      },
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    this.internalEventUploader = new SerialBatchEventUploader<WorkerEvent>({
+      maxBatchSize: 100,
+      maxBatchBytes: 10 * 1024 * 1024,
+      maxQueueSize: 200,
+      send: async batch => {
+        const result = await this.request(
+          'post',
+          '/worker/internal-events',
+          { worker_epoch: this.workerEpoch, events: batch },
+          'internal events',
+        )
+        if (!result.ok) {
+          throw new RetryableError(
+            'internal event POST failed',
+            result.retryAfterMs,
+          )
+        }
+      },
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    this.deliveryUploader = new SerialBatchEventUploader<{
+      eventId: string
+      status: 'received' | 'processing' | 'processed'
+    }>({
+      maxBatchSize: 64,
+      maxQueueSize: 64,
+      send: async batch => {
+        const result = await this.request(
+          'post',
+          '/worker/events/delivery',
+          {
+            worker_epoch: this.workerEpoch,
+            updates: batch.map(d => ({
+              event_id: d.eventId,
+              status: d.status,
+            })),
+          },
+          'delivery batch',
+        )
+        if (!result.ok) {
+          throw new RetryableError('delivery POST failed', result.retryAfterMs)
+        }
+      },
+      baseDelayMs: 500,
+      maxDelayMs: 30_000,
+      jitterMs: 500,
+    })
+
+    // Ack each received client_event so CCR can track delivery status.
+    // Wired here (not in initialize()) so the callback is registered the
+    // moment new CCRClient() returns — remoteIO must be free to call
+    // transport.connect() immediately after without racing the first
+    // SSE catch-up frame against an unwired onEventCallback.
+    transport.setOnEvent((event: StreamClientEvent) => {
+      this.reportDelivery(event.event_id, 'received')
+    })
+  }
+
+  /**
+   * Initialize the session worker:
+   * 1. Take worker_epoch from the argument, or fall back to
+   *    CLAUDE_CODE_WORKER_EPOCH (set by env-manager / bridge spawner)
+   * 2. Report state as 'idle'
+   * 3. Start heartbeat timer
+   *
+   * In-process callers (replBridge) pass the epoch directly — they
+   * registered the worker themselves and there is no parent process
+   * setting env vars.
+   */
+  async initialize(epoch?: number): Promise<Record<string, unknown> | null> {
+    const startMs = Date.now()
+    if (Object.keys(this.getAuthHeaders()).length === 0) {
+      throw new CCRInitError('no_auth_headers')
+    }
+    if (epoch === undefined) {
+      const rawEpoch = process.env.CLAUDE_CODE_WORKER_EPOCH
+      epoch = rawEpoch ? parseInt(rawEpoch, 10) : NaN
+    }
+    if (isNaN(epoch)) {
+      throw new CCRInitError('missing_epoch')
+    }
+    this.workerEpoch = epoch
+
+    // Concurrent with the init PUT — neither depends on the other.
+    const restoredPromise = this.getWorkerState()
+
+    const result = await this.request(
+      'put',
+      '/worker',
+      {
+        worker_status: 'idle',
+        worker_epoch: this.workerEpoch,
+        // Clear stale pending_action/task_summary left by a prior
+        // worker crash — the in-session clears don't survive process restart.
+        external_metadata: {
+          pending_action: null,
+          task_summary: null,
+        },
+      },
+      'PUT worker (init)',
+    )
+    if (!result.ok) {
+      // 409 → onEpochMismatch may throw, but request() catches it and returns
+      // false. Without this check we'd continue to startHeartbeat(), leaking a
+      // 20s timer against a dead epoch. Throw so connect()'s rejection handler
+      // fires instead of the success path.
+      throw new CCRInitError('worker_register_failed')
+    }
+    this.currentState = 'idle'
+    this.startHeartbeat()
+
+    // sessionActivity's refcount-gated timer fires while an API call or tool
+    // is in-flight; without a write the container lease can expire mid-wait.
+    // v1 wires this in WebSocketTransport per-connection.
+    registerSessionActivityCallback(() => {
+      void this.writeEvent({ type: 'keep_alive' })
+    })
+
+    logForDebugging(`CCRClient: initialized, epoch=${this.workerEpoch}`)
+    logForDiagnosticsNoPII('info', 'cli_worker_lifecycle_initialized', {
+      epoch: this.workerEpoch,
+      duration_ms: Date.now() - startMs,
+    })
+
+    // Await the concurrent GET and log state_restored here, after the PUT
+    // has succeeded — logging inside getWorkerState() raced: if the GET
+    // resolved before the PUT failed, diagnostics showed both init_failed
+    // and state_restored for the same session.
+    const { metadata, durationMs } = await restoredPromise
+    if (!this.closed) {
+      logForDiagnosticsNoPII('info', 'cli_worker_state_restored', {
+        duration_ms: durationMs,
+        had_state: metadata !== null,
+      })
+    }
+    return metadata
+  }
+
+  // Control_requests are marked processed and not re-delivered on
+  // restart, so read back what the prior worker wrote.
+  private async getWorkerState(): Promise<{
+    metadata: Record<string, unknown> | null
+    durationMs: number
+  }> {
+    const startMs = Date.now()
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) {
+      return { metadata: null, durationMs: 0 }
+    }
+    const data = await this.getWithRetry<WorkerStateResponse>(
+      `${this.sessionBaseUrl}/worker`,
+      authHeaders,
+      'worker_state',
+    )
+    return {
+      metadata: data?.worker?.external_metadata ?? null,
+      durationMs: Date.now() - startMs,
+    }
+  }
+
+  /**
+   * Send an authenticated HTTP request to CCR. Handles auth headers,
+   * 409 epoch mismatch, and error logging. Returns { ok: true } on 2xx.
+   * On 429, reads Retry-After (integer seconds) so the uploader can honor
+   * the server's backoff hint instead of blindly exponentiating.
+   */
+  private async request(
+    method: 'post' | 'put',
+    path: string,
+    body: unknown,
+    label: string,
+    { timeout = 10_000 }: { timeout?: number } = {},
+  ): Promise<RequestResult> {
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) return { ok: false }
+
+    try {
+      const response = await this.http[method](
+        `${this.sessionBaseUrl}${path}`,
+        body,
+        {
+          headers: {
+            ...authHeaders,
+            'Content-Type': 'application/json',
+            'anthropic-version': '2023-06-01',
+            'User-Agent': getClaudeCodeUserAgent(),
+          },
+          validateStatus: alwaysValidStatus,
+          timeout,
+        },
+      )
+
+      if (response.status >= 200 && response.status < 300) {
+        this.consecutiveAuthFailures = 0
+        return { ok: true }
+      }
+      if (response.status === 409) {
+        this.handleEpochMismatch()
+      }
+      if (response.status === 401 || response.status === 403) {
+        // A 401 with an expired JWT is deterministic — no retry will
+        // ever succeed. Check the token's own exp before burning
+        // wall-clock on the threshold loop.
+        const tok = getSessionIngressAuthToken()
+        const exp = tok ? decodeJwtExpiry(tok) : null
+        if (exp !== null && exp * 1000 < Date.now()) {
+          logForDebugging(
+            `CCRClient: session_token expired (exp=${new Date(exp * 1000).toISOString()}) — no refresh was delivered, exiting`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_worker_token_expired_no_refresh')
+          this.onEpochMismatch()
+        }
+        // Token looks valid but server says 401 — possible server-side
+        // blip (userauth down, KMS hiccup). Count toward threshold.
+        this.consecutiveAuthFailures++
+        if (this.consecutiveAuthFailures >= MAX_CONSECUTIVE_AUTH_FAILURES) {
+          logForDebugging(
+            `CCRClient: ${this.consecutiveAuthFailures} consecutive auth failures with a valid-looking token — server-side auth unrecoverable, exiting`,
+            { level: 'error' },
+          )
+          logForDiagnosticsNoPII('error', 'cli_worker_auth_failures_exhausted')
+          this.onEpochMismatch()
+        }
+      }
+      logForDebugging(`CCRClient: ${label} returned ${response.status}`, {
+        level: 'warn',
+      })
+      logForDiagnosticsNoPII('warn', 'cli_worker_request_failed', {
+        method,
+        path,
+        status: response.status,
+      })
+      if (response.status === 429) {
+        const raw = response.headers?.['retry-after']
+        const seconds = typeof raw === 'string' ? parseInt(raw, 10) : NaN
+        if (!isNaN(seconds) && seconds >= 0) {
+          return { ok: false, retryAfterMs: seconds * 1000 }
+        }
+      }
+      return { ok: false }
+    } catch (error) {
+      logForDebugging(`CCRClient: ${label} failed: ${errorMessage(error)}`, {
+        level: 'warn',
+      })
+      logForDiagnosticsNoPII('warn', 'cli_worker_request_error', {
+        method,
+        path,
+        error_code: getErrnoCode(error),
+      })
+      return { ok: false }
+    }
+  }
+
+  /** Report worker state to CCR via PUT /sessions/{id}/worker. */
+  reportState(state: SessionState, details?: RequiresActionDetails): void {
+    if (state === this.currentState && !details) return
+    this.currentState = state
+    this.workerState.enqueue({
+      worker_status: state,
+      requires_action_details: details
+        ? {
+            tool_name: details.tool_name,
+            action_description: details.action_description,
+            request_id: details.request_id,
+          }
+        : null,
+    })
+  }
+
+  /** Report external metadata to CCR via PUT /worker. */
+  reportMetadata(metadata: Record<string, unknown>): void {
+    this.workerState.enqueue({ external_metadata: metadata })
+  }
+
+  /**
+   * Handle epoch mismatch (409 Conflict). A newer CC instance has replaced
+   * this one — exit immediately.
+   */
+  private handleEpochMismatch(): never {
+    logForDebugging('CCRClient: Epoch mismatch (409), shutting down', {
+      level: 'error',
+    })
+    logForDiagnosticsNoPII('error', 'cli_worker_epoch_mismatch')
+    this.onEpochMismatch()
+  }
+
+  /** Start periodic heartbeat. */
+  private startHeartbeat(): void {
+    this.stopHeartbeat()
+    const schedule = (): void => {
+      const jitter =
+        this.heartbeatIntervalMs *
+        this.heartbeatJitterFraction *
+        (2 * Math.random() - 1)
+      this.heartbeatTimer = setTimeout(tick, this.heartbeatIntervalMs + jitter)
+    }
+    const tick = (): void => {
+      void this.sendHeartbeat()
+      // stopHeartbeat nulls the timer; check after the fire-and-forget send
+      // but before rescheduling so close() during sendHeartbeat is honored.
+      if (this.heartbeatTimer === null) return
+      schedule()
+    }
+    schedule()
+  }
+
+  /** Stop heartbeat timer. */
+  private stopHeartbeat(): void {
+    if (this.heartbeatTimer) {
+      clearTimeout(this.heartbeatTimer)
+      this.heartbeatTimer = null
+    }
+  }
+
+  /** Send a heartbeat via POST /sessions/{id}/worker/heartbeat. */
+  private async sendHeartbeat(): Promise<void> {
+    if (this.heartbeatInFlight) return
+    this.heartbeatInFlight = true
+    try {
+      const result = await this.request(
+        'post',
+        '/worker/heartbeat',
+        { session_id: this.sessionId, worker_epoch: this.workerEpoch },
+        'Heartbeat',
+        { timeout: 5_000 },
+      )
+      if (result.ok) {
+        logForDebugging('CCRClient: Heartbeat sent')
+      }
+    } finally {
+      this.heartbeatInFlight = false
+    }
+  }
+
+  /**
+   * Write a StdoutMessage as a client event via POST /sessions/{id}/worker/events.
+   * These events are visible to frontend clients via the SSE stream.
+   * Injects a UUID if missing to ensure server-side idempotency on retry.
+   *
+   * stream_event messages are held in a 100ms delay buffer and accumulated
+   * (text_deltas for the same content block emit a full-so-far snapshot per
+   * flush). A non-stream_event write flushes the buffer first so downstream
+   * ordering is preserved.
+   */
+  async writeEvent(message: StdoutMessage): Promise<void> {
+    if (message.type === 'stream_event') {
+      this.streamEventBuffer.push(message)
+      if (!this.streamEventTimer) {
+        this.streamEventTimer = setTimeout(
+          () => void this.flushStreamEventBuffer(),
+          STREAM_EVENT_FLUSH_INTERVAL_MS,
+        )
+      }
+      return
+    }
+    await this.flushStreamEventBuffer()
+    if (message.type === 'assistant') {
+      clearStreamAccumulatorForMessage(this.streamTextAccumulator, message)
+    }
+    await this.eventUploader.enqueue(this.toClientEvent(message))
+  }
+
+  /** Wrap a StdoutMessage as a ClientEvent, injecting a UUID if missing. */
+  private toClientEvent(message: StdoutMessage): ClientEvent {
+    const msg = message as unknown as Record<string, unknown>
+    return {
+      payload: {
+        ...msg,
+        uuid: typeof msg.uuid === 'string' ? msg.uuid : randomUUID(),
+      } as EventPayload,
+    }
+  }
+
+  /**
+   * Drain the stream_event delay buffer: accumulate text_deltas into
+   * full-so-far snapshots, clear the timer, enqueue the resulting events.
+   * Called from the timer, from writeEvent on a non-stream message, and from
+   * flush(). close() drops the buffer — call flush() first if you need
+   * delivery.
+   */
+  private async flushStreamEventBuffer(): Promise<void> {
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    if (this.streamEventBuffer.length === 0) return
+    const buffered = this.streamEventBuffer
+    this.streamEventBuffer = []
+    const payloads = accumulateStreamEvents(
+      buffered,
+      this.streamTextAccumulator,
+    )
+    await this.eventUploader.enqueue(
+      payloads.map(payload => ({ payload, ephemeral: true })),
+    )
+  }
+
+  /**
+   * Write an internal worker event via POST /sessions/{id}/worker/internal-events.
+   * These events are NOT visible to frontend clients — they store worker-internal
+   * state (transcript messages, compaction markers) needed for session resume.
+   */
+  async writeInternalEvent(
+    eventType: string,
+    payload: Record<string, unknown>,
+    {
+      isCompaction = false,
+      agentId,
+    }: {
+      isCompaction?: boolean
+      agentId?: string
+    } = {},
+  ): Promise<void> {
+    const event: WorkerEvent = {
+      payload: {
+        type: eventType,
+        ...payload,
+        uuid: typeof payload.uuid === 'string' ? payload.uuid : randomUUID(),
+      } as EventPayload,
+      ...(isCompaction && { is_compaction: true }),
+      ...(agentId && { agent_id: agentId }),
+    }
+    await this.internalEventUploader.enqueue(event)
+  }
+
+  /**
+   * Flush pending internal events. Call between turns and on shutdown
+   * to ensure transcript entries are persisted.
+   */
+  flushInternalEvents(): Promise<void> {
+    return this.internalEventUploader.flush()
+  }
+
+  /**
+   * Flush pending client events (writeEvent queue). Call before close()
+   * when the caller needs delivery confirmation — close() abandons the
+   * queue. Resolves once the uploader drains or rejects; returns
+   * regardless of whether individual POSTs succeeded (check server state
+   * separately if that matters).
+   */
+  async flush(): Promise<void> {
+    await this.flushStreamEventBuffer()
+    return this.eventUploader.flush()
+  }
+
+  /**
+   * Read foreground agent internal events from
+   * GET /sessions/{id}/worker/internal-events.
+   * Returns transcript entries from the last compaction boundary, or null on failure.
+   * Used for session resume.
+   */
+  async readInternalEvents(): Promise<InternalEvent[] | null> {
+    return this.paginatedGet('/worker/internal-events', {}, 'internal_events')
+  }
+
+  /**
+   * Read all subagent internal events from
+   * GET /sessions/{id}/worker/internal-events?subagents=true.
+   * Returns a merged stream across all non-foreground agents, each from its
+   * compaction point. Used for session resume.
+   */
+  async readSubagentInternalEvents(): Promise<InternalEvent[] | null> {
+    return this.paginatedGet(
+      '/worker/internal-events',
+      { subagents: 'true' },
+      'subagent_events',
+    )
+  }
+
+  /**
+   * Paginated GET with retry. Fetches all pages from a list endpoint,
+   * retrying each page on failure with exponential backoff + jitter.
+   */
+  private async paginatedGet(
+    path: string,
+    params: Record<string, string>,
+    context: string,
+  ): Promise<InternalEvent[] | null> {
+    const authHeaders = this.getAuthHeaders()
+    if (Object.keys(authHeaders).length === 0) return null
+
+    const allEvents: InternalEvent[] = []
+    let cursor: string | undefined
+
+    do {
+      const url = new URL(`${this.sessionBaseUrl}${path}`)
+      for (const [k, v] of Object.entries(params)) {
+        url.searchParams.set(k, v)
+      }
+      if (cursor) {
+        url.searchParams.set('cursor', cursor)
+      }
+
+      const page = await this.getWithRetry<ListInternalEventsResponse>(
+        url.toString(),
+        authHeaders,
+        context,
+      )
+      if (!page) return null
+
+      allEvents.push(...(page.data ?? []))
+      cursor = page.next_cursor
+    } while (cursor)
+
+    logForDebugging(
+      `CCRClient: Read ${allEvents.length} internal events from ${path}${params.subagents ? ' (subagents)' : ''}`,
+    )
+    return allEvents
+  }
+
+  /**
+   * Single GET request with retry. Returns the parsed response body
+   * on success, null if all retries are exhausted.
+   */
+  private async getWithRetry<T>(
+    url: string,
+    authHeaders: Record<string, string>,
+    context: string,
+  ): Promise<T | null> {
+    for (let attempt = 1; attempt <= 10; attempt++) {
+      let response
+      try {
+        response = await this.http.get<T>(url, {
+          headers: {
+            ...authHeaders,
+            'anthropic-version': '2023-06-01',
+            'User-Agent': getClaudeCodeUserAgent(),
+          },
+          validateStatus: alwaysValidStatus,
+          timeout: 30_000,
+        })
+      } catch (error) {
+        logForDebugging(
+          `CCRClient: GET ${url} failed (attempt ${attempt}/10): ${errorMessage(error)}`,
+          { level: 'warn' },
+        )
+        if (attempt < 10) {
+          const delay =
+            Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
+          await sleep(delay)
+        }
+        continue
+      }
+
+      if (response.status >= 200 && response.status < 300) {
+        return response.data
+      }
+      if (response.status === 409) {
+        this.handleEpochMismatch()
+      }
+      logForDebugging(
+        `CCRClient: GET ${url} returned ${response.status} (attempt ${attempt}/10)`,
+        { level: 'warn' },
+      )
+
+      if (attempt < 10) {
+        const delay =
+          Math.min(500 * 2 ** (attempt - 1), 30_000) + Math.random() * 500
+        await sleep(delay)
+      }
+    }
+
+    logForDebugging('CCRClient: GET retries exhausted', { level: 'error' })
+    logForDiagnosticsNoPII('error', 'cli_worker_get_retries_exhausted', {
+      context,
+    })
+    return null
+  }
+
+  /**
+   * Report delivery status for a client-to-worker event.
+   * POST /v1/code/sessions/{id}/worker/events/delivery (batch endpoint)
+   */
+  reportDelivery(
+    eventId: string,
+    status: 'received' | 'processing' | 'processed',
+  ): void {
+    void this.deliveryUploader.enqueue({ eventId, status })
+  }
+
+  /** Get the current epoch (for external use). */
+  getWorkerEpoch(): number {
+    return this.workerEpoch
+  }
+
+  /** Internal-event queue depth — shutdown-snapshot backpressure signal. */
+  get internalEventsPending(): number {
+    return this.internalEventUploader.pendingCount
+  }
+
+  /** Clean up uploaders and timers. */
+  close(): void {
+    this.closed = true
+    this.stopHeartbeat()
+    unregisterSessionActivityCallback()
+    if (this.streamEventTimer) {
+      clearTimeout(this.streamEventTimer)
+      this.streamEventTimer = null
+    }
+    this.streamEventBuffer = []
+    this.streamTextAccumulator.byMessage.clear()
+    this.streamTextAccumulator.scopeToMessage.clear()
+    this.workerState.close()
+    this.eventUploader.close()
+    this.internalEventUploader.close()
+    this.deliveryUploader.close()
+  }
+}

+ 45 - 0
src/cli/transports/transportUtils.ts

@@ -0,0 +1,45 @@
+import { URL } from 'url'
+import { isEnvTruthy } from '../../utils/envUtils.js'
+import { HybridTransport } from './HybridTransport.js'
+import { SSETransport } from './SSETransport.js'
+import type { Transport } from './Transport.js'
+import { WebSocketTransport } from './WebSocketTransport.js'
+
+/**
+ * Helper function to get the appropriate transport for a URL.
+ *
+ * Transport selection priority:
+ * 1. SSETransport (SSE reads + POST writes) when CLAUDE_CODE_USE_CCR_V2 is set
+ * 2. HybridTransport (WS reads + POST writes) when CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2 is set
+ * 3. WebSocketTransport (WS reads + WS writes) — default
+ */
+export function getTransportForUrl(
+  url: URL,
+  headers: Record<string, string> = {},
+  sessionId?: string,
+  refreshHeaders?: () => Record<string, string>,
+): Transport {
+  if (isEnvTruthy(process.env.CLAUDE_CODE_USE_CCR_V2)) {
+    // v2: SSE for reads, HTTP POST for writes
+    // --sdk-url is the session URL (.../sessions/{id});
+    // derive the SSE stream URL by appending /worker/events/stream
+    const sseUrl = new URL(url.href)
+    if (sseUrl.protocol === 'wss:') {
+      sseUrl.protocol = 'https:'
+    } else if (sseUrl.protocol === 'ws:') {
+      sseUrl.protocol = 'http:'
+    }
+    sseUrl.pathname =
+      sseUrl.pathname.replace(/\/$/, '') + '/worker/events/stream'
+    return new SSETransport(sseUrl, headers, sessionId, refreshHeaders)
+  }
+
+  if (url.protocol === 'ws:' || url.protocol === 'wss:') {
+    if (isEnvTruthy(process.env.CLAUDE_CODE_POST_FOR_SESSION_INGRESS_V2)) {
+      return new HybridTransport(url, headers, sessionId, refreshHeaders)
+    }
+    return new WebSocketTransport(url, headers, sessionId, refreshHeaders)
+  } else {
+    throw new Error(`Unsupported protocol: ${url.protocol}`)
+  }
+}

+ 422 - 0
src/cli/update.ts

@@ -0,0 +1,422 @@
+import chalk from 'chalk'
+import { logEvent } from 'src/services/analytics/index.js'
+import {
+  getLatestVersion,
+  type InstallStatus,
+  installGlobalPackage,
+} from 'src/utils/autoUpdater.js'
+import { regenerateCompletionCache } from 'src/utils/completionCache.js'
+import {
+  getGlobalConfig,
+  type InstallMethod,
+  saveGlobalConfig,
+} from 'src/utils/config.js'
+import { logForDebugging } from 'src/utils/debug.js'
+import { getDoctorDiagnostic } from 'src/utils/doctorDiagnostic.js'
+import { gracefulShutdown } from 'src/utils/gracefulShutdown.js'
+import {
+  installOrUpdateClaudePackage,
+  localInstallationExists,
+} from 'src/utils/localInstaller.js'
+import {
+  installLatest as installLatestNative,
+  removeInstalledSymlink,
+} from 'src/utils/nativeInstaller/index.js'
+import { getPackageManager } from 'src/utils/nativeInstaller/packageManagers.js'
+import { writeToStdout } from 'src/utils/process.js'
+import { gte } from 'src/utils/semver.js'
+import { getInitialSettings } from 'src/utils/settings/settings.js'
+
+export async function update() {
+  logEvent('tengu_update_check', {})
+  writeToStdout(`Current version: ${MACRO.VERSION}\n`)
+
+  const channel = getInitialSettings()?.autoUpdatesChannel ?? 'latest'
+  writeToStdout(`Checking for updates to ${channel} version...\n`)
+
+  logForDebugging('update: Starting update check')
+
+  // Run diagnostic to detect potential issues
+  logForDebugging('update: Running diagnostic')
+  const diagnostic = await getDoctorDiagnostic()
+  logForDebugging(`update: Installation type: ${diagnostic.installationType}`)
+  logForDebugging(
+    `update: Config install method: ${diagnostic.configInstallMethod}`,
+  )
+
+  // Check for multiple installations
+  if (diagnostic.multipleInstallations.length > 1) {
+    writeToStdout('\n')
+    writeToStdout(chalk.yellow('Warning: Multiple installations found') + '\n')
+    for (const install of diagnostic.multipleInstallations) {
+      const current =
+        diagnostic.installationType === install.type
+          ? ' (currently running)'
+          : ''
+      writeToStdout(`- ${install.type} at ${install.path}${current}\n`)
+    }
+  }
+
+  // Display warnings if any exist
+  if (diagnostic.warnings.length > 0) {
+    writeToStdout('\n')
+    for (const warning of diagnostic.warnings) {
+      logForDebugging(`update: Warning detected: ${warning.issue}`)
+
+      // Don't skip PATH warnings - they're always relevant
+      // The user needs to know that 'which claude' points elsewhere
+      logForDebugging(`update: Showing warning: ${warning.issue}`)
+
+      writeToStdout(chalk.yellow(`Warning: ${warning.issue}\n`))
+
+      writeToStdout(chalk.bold(`Fix: ${warning.fix}\n`))
+    }
+  }
+
+  // Update config if installMethod is not set (but skip for package managers)
+  const config = getGlobalConfig()
+  if (
+    !config.installMethod &&
+    diagnostic.installationType !== 'package-manager'
+  ) {
+    writeToStdout('\n')
+    writeToStdout('Updating configuration to track installation method...\n')
+    let detectedMethod: 'local' | 'native' | 'global' | 'unknown' = 'unknown'
+
+    // Map diagnostic installation type to config install method
+    switch (diagnostic.installationType) {
+      case 'npm-local':
+        detectedMethod = 'local'
+        break
+      case 'native':
+        detectedMethod = 'native'
+        break
+      case 'npm-global':
+        detectedMethod = 'global'
+        break
+      default:
+        detectedMethod = 'unknown'
+    }
+
+    saveGlobalConfig(current => ({
+      ...current,
+      installMethod: detectedMethod,
+    }))
+    writeToStdout(`Installation method set to: ${detectedMethod}\n`)
+  }
+
+  // Check if running from development build
+  if (diagnostic.installationType === 'development') {
+    writeToStdout('\n')
+    writeToStdout(
+      chalk.yellow('Warning: Cannot update development build') + '\n',
+    )
+    await gracefulShutdown(1)
+  }
+
+  // Check if running from a package manager
+  if (diagnostic.installationType === 'package-manager') {
+    const packageManager = await getPackageManager()
+    writeToStdout('\n')
+
+    if (packageManager === 'homebrew') {
+      writeToStdout('Claude is managed by Homebrew.\n')
+      const latest = await getLatestVersion(channel)
+      if (latest && !gte(MACRO.VERSION, latest)) {
+        writeToStdout(`Update available: ${MACRO.VERSION} → ${latest}\n`)
+        writeToStdout('\n')
+        writeToStdout('To update, run:\n')
+        writeToStdout(chalk.bold('  brew upgrade claude-code') + '\n')
+      } else {
+        writeToStdout('Claude is up to date!\n')
+      }
+    } else if (packageManager === 'winget') {
+      writeToStdout('Claude is managed by winget.\n')
+      const latest = await getLatestVersion(channel)
+      if (latest && !gte(MACRO.VERSION, latest)) {
+        writeToStdout(`Update available: ${MACRO.VERSION} → ${latest}\n`)
+        writeToStdout('\n')
+        writeToStdout('To update, run:\n')
+        writeToStdout(
+          chalk.bold('  winget upgrade Anthropic.ClaudeCode') + '\n',
+        )
+      } else {
+        writeToStdout('Claude is up to date!\n')
+      }
+    } else if (packageManager === 'apk') {
+      writeToStdout('Claude is managed by apk.\n')
+      const latest = await getLatestVersion(channel)
+      if (latest && !gte(MACRO.VERSION, latest)) {
+        writeToStdout(`Update available: ${MACRO.VERSION} → ${latest}\n`)
+        writeToStdout('\n')
+        writeToStdout('To update, run:\n')
+        writeToStdout(chalk.bold('  apk upgrade claude-code') + '\n')
+      } else {
+        writeToStdout('Claude is up to date!\n')
+      }
+    } else {
+      // pacman, deb, and rpm don't get specific commands because they each have
+      // multiple frontends (pacman: yay/paru/makepkg, deb: apt/apt-get/aptitude/nala,
+      // rpm: dnf/yum/zypper)
+      writeToStdout('Claude is managed by a package manager.\n')
+      writeToStdout('Please use your package manager to update.\n')
+    }
+
+    await gracefulShutdown(0)
+  }
+
+  // Check for config/reality mismatch (skip for package-manager installs)
+  if (
+    config.installMethod &&
+    diagnostic.configInstallMethod !== 'not set' &&
+    diagnostic.installationType !== 'package-manager'
+  ) {
+    const runningType = diagnostic.installationType
+    const configExpects = diagnostic.configInstallMethod
+
+    // Map installation types for comparison
+    const typeMapping: Record<string, string> = {
+      'npm-local': 'local',
+      'npm-global': 'global',
+      native: 'native',
+      development: 'development',
+      unknown: 'unknown',
+    }
+
+    const normalizedRunningType = typeMapping[runningType] || runningType
+
+    if (
+      normalizedRunningType !== configExpects &&
+      configExpects !== 'unknown'
+    ) {
+      writeToStdout('\n')
+      writeToStdout(chalk.yellow('Warning: Configuration mismatch') + '\n')
+      writeToStdout(`Config expects: ${configExpects} installation\n`)
+      writeToStdout(`Currently running: ${runningType}\n`)
+      writeToStdout(
+        chalk.yellow(
+          `Updating the ${runningType} installation you are currently using`,
+        ) + '\n',
+      )
+
+      // Update config to match reality
+      saveGlobalConfig(current => ({
+        ...current,
+        installMethod: normalizedRunningType as InstallMethod,
+      }))
+      writeToStdout(
+        `Config updated to reflect current installation method: ${normalizedRunningType}\n`,
+      )
+    }
+  }
+
+  // Handle native installation updates first
+  if (diagnostic.installationType === 'native') {
+    logForDebugging(
+      'update: Detected native installation, using native updater',
+    )
+    try {
+      const result = await installLatestNative(channel, true)
+
+      // Handle lock contention gracefully
+      if (result.lockFailed) {
+        const pidInfo = result.lockHolderPid
+          ? ` (PID ${result.lockHolderPid})`
+          : ''
+        writeToStdout(
+          chalk.yellow(
+            `Another Claude process${pidInfo} is currently running. Please try again in a moment.`,
+          ) + '\n',
+        )
+        await gracefulShutdown(0)
+      }
+
+      if (!result.latestVersion) {
+        process.stderr.write('Failed to check for updates\n')
+        await gracefulShutdown(1)
+      }
+
+      if (result.latestVersion === MACRO.VERSION) {
+        writeToStdout(
+          chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
+        )
+      } else {
+        writeToStdout(
+          chalk.green(
+            `Successfully updated from ${MACRO.VERSION} to version ${result.latestVersion}`,
+          ) + '\n',
+        )
+        await regenerateCompletionCache()
+      }
+      await gracefulShutdown(0)
+    } catch (error) {
+      process.stderr.write('Error: Failed to install native update\n')
+      process.stderr.write(String(error) + '\n')
+      process.stderr.write('Try running "claude doctor" for diagnostics\n')
+      await gracefulShutdown(1)
+    }
+  }
+
+  // Fallback to existing JS/npm-based update logic
+  // Remove native installer symlink since we're not using native installation
+  // But only if user hasn't migrated to native installation
+  if (config.installMethod !== 'native') {
+    await removeInstalledSymlink()
+  }
+
+  logForDebugging('update: Checking npm registry for latest version')
+  logForDebugging(`update: Package URL: ${MACRO.PACKAGE_URL}`)
+  const npmTag = channel === 'stable' ? 'stable' : 'latest'
+  const npmCommand = `npm view ${MACRO.PACKAGE_URL}@${npmTag} version`
+  logForDebugging(`update: Running: ${npmCommand}`)
+  const latestVersion = await getLatestVersion(channel)
+  logForDebugging(
+    `update: Latest version from npm: ${latestVersion || 'FAILED'}`,
+  )
+
+  if (!latestVersion) {
+    logForDebugging('update: Failed to get latest version from npm registry')
+    process.stderr.write(chalk.red('Failed to check for updates') + '\n')
+    process.stderr.write('Unable to fetch latest version from npm registry\n')
+    process.stderr.write('\n')
+    process.stderr.write('Possible causes:\n')
+    process.stderr.write('  • Network connectivity issues\n')
+    process.stderr.write('  • npm registry is unreachable\n')
+    process.stderr.write('  • Corporate proxy/firewall blocking npm\n')
+    if (MACRO.PACKAGE_URL && !MACRO.PACKAGE_URL.startsWith('@anthropic')) {
+      process.stderr.write(
+        '  • Internal/development build not published to npm\n',
+      )
+    }
+    process.stderr.write('\n')
+    process.stderr.write('Try:\n')
+    process.stderr.write('  • Check your internet connection\n')
+    process.stderr.write('  • Run with --debug flag for more details\n')
+    const packageName =
+      MACRO.PACKAGE_URL ||
+      (process.env.USER_TYPE === 'ant'
+        ? '@anthropic-ai/claude-cli'
+        : '@anthropic-ai/claude-code')
+    process.stderr.write(
+      `  • Manually check: npm view ${packageName} version\n`,
+    )
+
+    process.stderr.write('  • Check if you need to login: npm whoami\n')
+    await gracefulShutdown(1)
+  }
+
+  // Check if versions match exactly, including any build metadata (like SHA)
+  if (latestVersion === MACRO.VERSION) {
+    writeToStdout(
+      chalk.green(`Claude Code is up to date (${MACRO.VERSION})`) + '\n',
+    )
+    await gracefulShutdown(0)
+  }
+
+  writeToStdout(
+    `New version available: ${latestVersion} (current: ${MACRO.VERSION})\n`,
+  )
+  writeToStdout('Installing update...\n')
+
+  // Determine update method based on what's actually running
+  let useLocalUpdate = false
+  let updateMethodName = ''
+
+  switch (diagnostic.installationType) {
+    case 'npm-local':
+      useLocalUpdate = true
+      updateMethodName = 'local'
+      break
+    case 'npm-global':
+      useLocalUpdate = false
+      updateMethodName = 'global'
+      break
+    case 'unknown': {
+      // Fallback to detection if we can't determine installation type
+      const isLocal = await localInstallationExists()
+      useLocalUpdate = isLocal
+      updateMethodName = isLocal ? 'local' : 'global'
+      writeToStdout(
+        chalk.yellow('Warning: Could not determine installation type') + '\n',
+      )
+      writeToStdout(
+        `Attempting ${updateMethodName} update based on file detection...\n`,
+      )
+      break
+    }
+    default:
+      process.stderr.write(
+        `Error: Cannot update ${diagnostic.installationType} installation\n`,
+      )
+      await gracefulShutdown(1)
+  }
+
+  writeToStdout(`Using ${updateMethodName} installation update method...\n`)
+
+  logForDebugging(`update: Update method determined: ${updateMethodName}`)
+  logForDebugging(`update: useLocalUpdate: ${useLocalUpdate}`)
+
+  let status: InstallStatus
+
+  if (useLocalUpdate) {
+    logForDebugging(
+      'update: Calling installOrUpdateClaudePackage() for local update',
+    )
+    status = await installOrUpdateClaudePackage(channel)
+  } else {
+    logForDebugging('update: Calling installGlobalPackage() for global update')
+    status = await installGlobalPackage()
+  }
+
+  logForDebugging(`update: Installation status: ${status}`)
+
+  switch (status) {
+    case 'success':
+      writeToStdout(
+        chalk.green(
+          `Successfully updated from ${MACRO.VERSION} to version ${latestVersion}`,
+        ) + '\n',
+      )
+      await regenerateCompletionCache()
+      break
+    case 'no_permissions':
+      process.stderr.write(
+        'Error: Insufficient permissions to install update\n',
+      )
+      if (useLocalUpdate) {
+        process.stderr.write('Try manually updating with:\n')
+        process.stderr.write(
+          `  cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
+        )
+      } else {
+        process.stderr.write('Try running with sudo or fix npm permissions\n')
+        process.stderr.write(
+          'Or consider using native installation with: claude install\n',
+        )
+      }
+      await gracefulShutdown(1)
+      break
+    case 'install_failed':
+      process.stderr.write('Error: Failed to install update\n')
+      if (useLocalUpdate) {
+        process.stderr.write('Try manually updating with:\n')
+        process.stderr.write(
+          `  cd ~/.claude/local && npm update ${MACRO.PACKAGE_URL}\n`,
+        )
+      } else {
+        process.stderr.write(
+          'Or consider using native installation with: claude install\n',
+        )
+      }
+      await gracefulShutdown(1)
+      break
+    case 'in_progress':
+      process.stderr.write(
+        'Error: Another instance is currently performing an update\n',
+      )
+      process.stderr.write('Please wait and try again later\n')
+      await gracefulShutdown(1)
+      break
+  }
+  await gracefulShutdown(0)
+}

+ 754 - 0
src/commands.ts

@@ -0,0 +1,754 @@
+// biome-ignore-all assist/source/organizeImports: ANT-ONLY import markers must not be reordered
+import addDir from './commands/add-dir/index.js'
+import autofixPr from './commands/autofix-pr/index.js'
+import backfillSessions from './commands/backfill-sessions/index.js'
+import btw from './commands/btw/index.js'
+import goodClaude from './commands/good-claude/index.js'
+import issue from './commands/issue/index.js'
+import feedback from './commands/feedback/index.js'
+import clear from './commands/clear/index.js'
+import color from './commands/color/index.js'
+import commit from './commands/commit.js'
+import copy from './commands/copy/index.js'
+import desktop from './commands/desktop/index.js'
+import commitPushPr from './commands/commit-push-pr.js'
+import compact from './commands/compact/index.js'
+import config from './commands/config/index.js'
+import { context, contextNonInteractive } from './commands/context/index.js'
+import cost from './commands/cost/index.js'
+import diff from './commands/diff/index.js'
+import ctx_viz from './commands/ctx_viz/index.js'
+import doctor from './commands/doctor/index.js'
+import memory from './commands/memory/index.js'
+import help from './commands/help/index.js'
+import ide from './commands/ide/index.js'
+import init from './commands/init.js'
+import initVerifiers from './commands/init-verifiers.js'
+import keybindings from './commands/keybindings/index.js'
+import login from './commands/login/index.js'
+import logout from './commands/logout/index.js'
+import installGitHubApp from './commands/install-github-app/index.js'
+import installSlackApp from './commands/install-slack-app/index.js'
+import breakCache from './commands/break-cache/index.js'
+import mcp from './commands/mcp/index.js'
+import mobile from './commands/mobile/index.js'
+import onboarding from './commands/onboarding/index.js'
+import pr_comments from './commands/pr_comments/index.js'
+import releaseNotes from './commands/release-notes/index.js'
+import rename from './commands/rename/index.js'
+import resume from './commands/resume/index.js'
+import review, { ultrareview } from './commands/review.js'
+import session from './commands/session/index.js'
+import share from './commands/share/index.js'
+import skills from './commands/skills/index.js'
+import status from './commands/status/index.js'
+import tasks from './commands/tasks/index.js'
+import teleport from './commands/teleport/index.js'
+/* eslint-disable @typescript-eslint/no-require-imports */
+const agentsPlatform =
+  process.env.USER_TYPE === 'ant'
+    ? require('./commands/agents-platform/index.js').default
+    : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+import securityReview from './commands/security-review.js'
+import bughunter from './commands/bughunter/index.js'
+import terminalSetup from './commands/terminalSetup/index.js'
+import usage from './commands/usage/index.js'
+import theme from './commands/theme/index.js'
+import vim from './commands/vim/index.js'
+import { feature } from 'bun:bundle'
+// Dead code elimination: conditional imports
+/* eslint-disable @typescript-eslint/no-require-imports */
+const proactive =
+  feature('PROACTIVE') || feature('KAIROS')
+    ? require('./commands/proactive.js').default
+    : null
+const briefCommand =
+  feature('KAIROS') || feature('KAIROS_BRIEF')
+    ? require('./commands/brief.js').default
+    : null
+const assistantCommand = feature('KAIROS')
+  ? require('./commands/assistant/index.js').default
+  : null
+const bridge = feature('BRIDGE_MODE')
+  ? require('./commands/bridge/index.js').default
+  : null
+const remoteControlServerCommand =
+  feature('DAEMON') && feature('BRIDGE_MODE')
+    ? require('./commands/remoteControlServer/index.js').default
+    : null
+const voiceCommand = feature('VOICE_MODE')
+  ? require('./commands/voice/index.js').default
+  : null
+const forceSnip = feature('HISTORY_SNIP')
+  ? require('./commands/force-snip.js').default
+  : null
+const workflowsCmd = feature('WORKFLOW_SCRIPTS')
+  ? (
+      require('./commands/workflows/index.js') as typeof import('./commands/workflows/index.js')
+    ).default
+  : null
+const webCmd = feature('CCR_REMOTE_SETUP')
+  ? (
+      require('./commands/remote-setup/index.js') as typeof import('./commands/remote-setup/index.js')
+    ).default
+  : null
+const clearSkillIndexCache = feature('EXPERIMENTAL_SKILL_SEARCH')
+  ? (
+      require('./services/skillSearch/localSearch.js') as typeof import('./services/skillSearch/localSearch.js')
+    ).clearSkillIndexCache
+  : null
+const subscribePr = feature('KAIROS_GITHUB_WEBHOOKS')
+  ? require('./commands/subscribe-pr.js').default
+  : null
+const ultraplan = feature('ULTRAPLAN')
+  ? require('./commands/ultraplan.js').default
+  : null
+const torch = feature('TORCH') ? require('./commands/torch.js').default : null
+const peersCmd = feature('UDS_INBOX')
+  ? (
+      require('./commands/peers/index.js') as typeof import('./commands/peers/index.js')
+    ).default
+  : null
+const forkCmd = feature('FORK_SUBAGENT')
+  ? (
+      require('./commands/fork/index.js') as typeof import('./commands/fork/index.js')
+    ).default
+  : null
+const buddy = feature('BUDDY')
+  ? (
+      require('./commands/buddy/index.js') as typeof import('./commands/buddy/index.js')
+    ).default
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+import thinkback from './commands/thinkback/index.js'
+import thinkbackPlay from './commands/thinkback-play/index.js'
+import permissions from './commands/permissions/index.js'
+import plan from './commands/plan/index.js'
+import fast from './commands/fast/index.js'
+import passes from './commands/passes/index.js'
+import privacySettings from './commands/privacy-settings/index.js'
+import hooks from './commands/hooks/index.js'
+import files from './commands/files/index.js'
+import branch from './commands/branch/index.js'
+import agents from './commands/agents/index.js'
+import plugin from './commands/plugin/index.js'
+import reloadPlugins from './commands/reload-plugins/index.js'
+import rewind from './commands/rewind/index.js'
+import heapDump from './commands/heapdump/index.js'
+import mockLimits from './commands/mock-limits/index.js'
+import bridgeKick from './commands/bridge-kick.js'
+import version from './commands/version.js'
+import summary from './commands/summary/index.js'
+import {
+  resetLimits,
+  resetLimitsNonInteractive,
+} from './commands/reset-limits/index.js'
+import antTrace from './commands/ant-trace/index.js'
+import perfIssue from './commands/perf-issue/index.js'
+import sandboxToggle from './commands/sandbox-toggle/index.js'
+import chrome from './commands/chrome/index.js'
+import stickers from './commands/stickers/index.js'
+import advisor from './commands/advisor.js'
+import { logError } from './utils/log.js'
+import { toError } from './utils/errors.js'
+import { logForDebugging } from './utils/debug.js'
+import {
+  getSkillDirCommands,
+  clearSkillCaches,
+  getDynamicSkills,
+} from './skills/loadSkillsDir.js'
+import { getBundledSkills } from './skills/bundledSkills.js'
+import { getBuiltinPluginSkillCommands } from './plugins/builtinPlugins.js'
+import {
+  getPluginCommands,
+  clearPluginCommandCache,
+  getPluginSkills,
+  clearPluginSkillsCache,
+} from './utils/plugins/loadPluginCommands.js'
+import memoize from 'lodash-es/memoize.js'
+import { isUsing3PServices, isClaudeAISubscriber } from './utils/auth.js'
+import { isFirstPartyAnthropicBaseUrl } from './utils/model/providers.js'
+import env from './commands/env/index.js'
+import exit from './commands/exit/index.js'
+import exportCommand from './commands/export/index.js'
+import model from './commands/model/index.js'
+import tag from './commands/tag/index.js'
+import outputStyle from './commands/output-style/index.js'
+import remoteEnv from './commands/remote-env/index.js'
+import upgrade from './commands/upgrade/index.js'
+import {
+  extraUsage,
+  extraUsageNonInteractive,
+} from './commands/extra-usage/index.js'
+import rateLimitOptions from './commands/rate-limit-options/index.js'
+import statusline from './commands/statusline.js'
+import effort from './commands/effort/index.js'
+import stats from './commands/stats/index.js'
+// insights.ts is 113KB (3200 lines, includes diffLines/html rendering). Lazy
+// shim defers the heavy module until /insights is actually invoked.
+const usageReport: Command = {
+  type: 'prompt',
+  name: 'insights',
+  description: 'Generate a report analyzing your Claude Code sessions',
+  contentLength: 0,
+  progressMessage: 'analyzing your sessions',
+  source: 'builtin',
+  async getPromptForCommand(args, context) {
+    const real = (await import('./commands/insights.js')).default
+    if (real.type !== 'prompt') throw new Error('unreachable')
+    return real.getPromptForCommand(args, context)
+  },
+}
+import oauthRefresh from './commands/oauth-refresh/index.js'
+import debugToolCall from './commands/debug-tool-call/index.js'
+import { getSettingSourceName } from './utils/settings/constants.js'
+import {
+  type Command,
+  getCommandName,
+  isCommandEnabled,
+} from './types/command.js'
+
+// Re-export types from the centralized location
+export type {
+  Command,
+  CommandBase,
+  CommandResultDisplay,
+  LocalCommandResult,
+  LocalJSXCommandContext,
+  PromptCommand,
+  ResumeEntrypoint,
+} from './types/command.js'
+export { getCommandName, isCommandEnabled } from './types/command.js'
+
+// Commands that get eliminated from the external build
+export const INTERNAL_ONLY_COMMANDS = [
+  backfillSessions,
+  breakCache,
+  bughunter,
+  commit,
+  commitPushPr,
+  ctx_viz,
+  goodClaude,
+  issue,
+  initVerifiers,
+  ...(forceSnip ? [forceSnip] : []),
+  mockLimits,
+  bridgeKick,
+  version,
+  ...(ultraplan ? [ultraplan] : []),
+  ...(subscribePr ? [subscribePr] : []),
+  resetLimits,
+  resetLimitsNonInteractive,
+  onboarding,
+  share,
+  summary,
+  teleport,
+  antTrace,
+  perfIssue,
+  env,
+  oauthRefresh,
+  debugToolCall,
+  agentsPlatform,
+  autofixPr,
+].filter(Boolean)
+
+// Declared as a function so that we don't run this until getCommands is called,
+// since underlying functions read from config, which can't be read at module initialization time
+const COMMANDS = memoize((): Command[] => [
+  addDir,
+  advisor,
+  agents,
+  branch,
+  btw,
+  chrome,
+  clear,
+  color,
+  compact,
+  config,
+  copy,
+  desktop,
+  context,
+  contextNonInteractive,
+  cost,
+  diff,
+  doctor,
+  effort,
+  exit,
+  fast,
+  files,
+  heapDump,
+  help,
+  ide,
+  init,
+  keybindings,
+  installGitHubApp,
+  installSlackApp,
+  mcp,
+  memory,
+  mobile,
+  model,
+  outputStyle,
+  remoteEnv,
+  plugin,
+  pr_comments,
+  releaseNotes,
+  reloadPlugins,
+  rename,
+  resume,
+  session,
+  skills,
+  stats,
+  status,
+  statusline,
+  stickers,
+  tag,
+  theme,
+  feedback,
+  review,
+  ultrareview,
+  rewind,
+  securityReview,
+  terminalSetup,
+  upgrade,
+  extraUsage,
+  extraUsageNonInteractive,
+  rateLimitOptions,
+  usage,
+  usageReport,
+  vim,
+  ...(webCmd ? [webCmd] : []),
+  ...(forkCmd ? [forkCmd] : []),
+  ...(buddy ? [buddy] : []),
+  ...(proactive ? [proactive] : []),
+  ...(briefCommand ? [briefCommand] : []),
+  ...(assistantCommand ? [assistantCommand] : []),
+  ...(bridge ? [bridge] : []),
+  ...(remoteControlServerCommand ? [remoteControlServerCommand] : []),
+  ...(voiceCommand ? [voiceCommand] : []),
+  thinkback,
+  thinkbackPlay,
+  permissions,
+  plan,
+  privacySettings,
+  hooks,
+  exportCommand,
+  sandboxToggle,
+  ...(!isUsing3PServices() ? [logout, login()] : []),
+  passes,
+  ...(peersCmd ? [peersCmd] : []),
+  tasks,
+  ...(workflowsCmd ? [workflowsCmd] : []),
+  ...(torch ? [torch] : []),
+  ...(process.env.USER_TYPE === 'ant' && !process.env.IS_DEMO
+    ? INTERNAL_ONLY_COMMANDS
+    : []),
+])
+
+export const builtInCommandNames = memoize(
+  (): Set<string> =>
+    new Set(COMMANDS().flatMap(_ => [_.name, ...(_.aliases ?? [])])),
+)
+
+async function getSkills(cwd: string): Promise<{
+  skillDirCommands: Command[]
+  pluginSkills: Command[]
+  bundledSkills: Command[]
+  builtinPluginSkills: Command[]
+}> {
+  try {
+    const [skillDirCommands, pluginSkills] = await Promise.all([
+      getSkillDirCommands(cwd).catch(err => {
+        logError(toError(err))
+        logForDebugging(
+          'Skill directory commands failed to load, continuing without them',
+        )
+        return []
+      }),
+      getPluginSkills().catch(err => {
+        logError(toError(err))
+        logForDebugging('Plugin skills failed to load, continuing without them')
+        return []
+      }),
+    ])
+    // Bundled skills are registered synchronously at startup
+    const bundledSkills = getBundledSkills()
+    // Built-in plugin skills come from enabled built-in plugins
+    const builtinPluginSkills = getBuiltinPluginSkillCommands()
+    logForDebugging(
+      `getSkills returning: ${skillDirCommands.length} skill dir commands, ${pluginSkills.length} plugin skills, ${bundledSkills.length} bundled skills, ${builtinPluginSkills.length} builtin plugin skills`,
+    )
+    return {
+      skillDirCommands,
+      pluginSkills,
+      bundledSkills,
+      builtinPluginSkills,
+    }
+  } catch (err) {
+    // This should never happen since we catch at the Promise level, but defensive
+    logError(toError(err))
+    logForDebugging('Unexpected error in getSkills, returning empty')
+    return {
+      skillDirCommands: [],
+      pluginSkills: [],
+      bundledSkills: [],
+      builtinPluginSkills: [],
+    }
+  }
+}
+
+/* eslint-disable @typescript-eslint/no-require-imports */
+const getWorkflowCommands = feature('WORKFLOW_SCRIPTS')
+  ? (
+      require('./tools/WorkflowTool/createWorkflowCommand.js') as typeof import('./tools/WorkflowTool/createWorkflowCommand.js')
+    ).getWorkflowCommands
+  : null
+/* eslint-enable @typescript-eslint/no-require-imports */
+
+/**
+ * Filters commands by their declared `availability` (auth/provider requirement).
+ * Commands without `availability` are treated as universal.
+ * This runs before `isEnabled()` so that provider-gated commands are hidden
+ * regardless of feature-flag state.
+ *
+ * Not memoized — auth state can change mid-session (e.g. after /login),
+ * so this must be re-evaluated on every getCommands() call.
+ */
+export function meetsAvailabilityRequirement(cmd: Command): boolean {
+  if (!cmd.availability) return true
+  for (const a of cmd.availability) {
+    switch (a) {
+      case 'claude-ai':
+        if (isClaudeAISubscriber()) return true
+        break
+      case 'console':
+        // Console API key user = direct 1P API customer (not 3P, not claude.ai).
+        // Excludes 3P (Bedrock/Vertex/Foundry) who don't set ANTHROPIC_BASE_URL
+        // and gateway users who proxy through a custom base URL.
+        if (
+          !isClaudeAISubscriber() &&
+          !isUsing3PServices() &&
+          isFirstPartyAnthropicBaseUrl()
+        )
+          return true
+        break
+      default: {
+        const _exhaustive: never = a
+        void _exhaustive
+        break
+      }
+    }
+  }
+  return false
+}
+
+/**
+ * Loads all command sources (skills, plugins, workflows). Memoized by cwd
+ * because loading is expensive (disk I/O, dynamic imports).
+ */
+const loadAllCommands = memoize(async (cwd: string): Promise<Command[]> => {
+  const [
+    { skillDirCommands, pluginSkills, bundledSkills, builtinPluginSkills },
+    pluginCommands,
+    workflowCommands,
+  ] = await Promise.all([
+    getSkills(cwd),
+    getPluginCommands(),
+    getWorkflowCommands ? getWorkflowCommands(cwd) : Promise.resolve([]),
+  ])
+
+  return [
+    ...bundledSkills,
+    ...builtinPluginSkills,
+    ...skillDirCommands,
+    ...workflowCommands,
+    ...pluginCommands,
+    ...pluginSkills,
+    ...COMMANDS(),
+  ]
+})
+
+/**
+ * Returns commands available to the current user. The expensive loading is
+ * memoized, but availability and isEnabled checks run fresh every call so
+ * auth changes (e.g. /login) take effect immediately.
+ */
+export async function getCommands(cwd: string): Promise<Command[]> {
+  const allCommands = await loadAllCommands(cwd)
+
+  // Get dynamic skills discovered during file operations
+  const dynamicSkills = getDynamicSkills()
+
+  // Build base commands without dynamic skills
+  const baseCommands = allCommands.filter(
+    _ => meetsAvailabilityRequirement(_) && isCommandEnabled(_),
+  )
+
+  if (dynamicSkills.length === 0) {
+    return baseCommands
+  }
+
+  // Dedupe dynamic skills - only add if not already present
+  const baseCommandNames = new Set(baseCommands.map(c => c.name))
+  const uniqueDynamicSkills = dynamicSkills.filter(
+    s =>
+      !baseCommandNames.has(s.name) &&
+      meetsAvailabilityRequirement(s) &&
+      isCommandEnabled(s),
+  )
+
+  if (uniqueDynamicSkills.length === 0) {
+    return baseCommands
+  }
+
+  // Insert dynamic skills after plugin skills but before built-in commands
+  const builtInNames = new Set(COMMANDS().map(c => c.name))
+  const insertIndex = baseCommands.findIndex(c => builtInNames.has(c.name))
+
+  if (insertIndex === -1) {
+    return [...baseCommands, ...uniqueDynamicSkills]
+  }
+
+  return [
+    ...baseCommands.slice(0, insertIndex),
+    ...uniqueDynamicSkills,
+    ...baseCommands.slice(insertIndex),
+  ]
+}
+
+/**
+ * Clears only the memoization caches for commands, WITHOUT clearing skill caches.
+ * Use this when dynamic skills are added to invalidate cached command lists.
+ */
+export function clearCommandMemoizationCaches(): void {
+  loadAllCommands.cache?.clear?.()
+  getSkillToolCommands.cache?.clear?.()
+  getSlashCommandToolSkills.cache?.clear?.()
+  // getSkillIndex in skillSearch/localSearch.ts is a separate memoization layer
+  // built ON TOP of getSkillToolCommands/getCommands. Clearing only the inner
+  // caches is a no-op for the outer — lodash memoize returns the cached result
+  // without ever reaching the cleared inners. Must clear it explicitly.
+  clearSkillIndexCache?.()
+}
+
+export function clearCommandsCache(): void {
+  clearCommandMemoizationCaches()
+  clearPluginCommandCache()
+  clearPluginSkillsCache()
+  clearSkillCaches()
+}
+
+/**
+ * Filter AppState.mcp.commands to MCP-provided skills (prompt-type,
+ * model-invocable, loaded from MCP). These live outside getCommands() so
+ * callers that need MCP skills in their skill index thread them through
+ * separately.
+ */
+export function getMcpSkillCommands(
+  mcpCommands: readonly Command[],
+): readonly Command[] {
+  if (feature('MCP_SKILLS')) {
+    return mcpCommands.filter(
+      cmd =>
+        cmd.type === 'prompt' &&
+        cmd.loadedFrom === 'mcp' &&
+        !cmd.disableModelInvocation,
+    )
+  }
+  return []
+}
+
+// SkillTool shows ALL prompt-based commands that the model can invoke
+// This includes both skills (from /skills/) and commands (from /commands/)
+export const getSkillToolCommands = memoize(
+  async (cwd: string): Promise<Command[]> => {
+    const allCommands = await getCommands(cwd)
+    return allCommands.filter(
+      cmd =>
+        cmd.type === 'prompt' &&
+        !cmd.disableModelInvocation &&
+        cmd.source !== 'builtin' &&
+        // Always include skills from /skills/ dirs, bundled skills, and legacy /commands/ entries
+        // (they all get an auto-derived description from the first line if frontmatter is missing).
+        // Plugin/MCP commands still require an explicit description to appear in the listing.
+        (cmd.loadedFrom === 'bundled' ||
+          cmd.loadedFrom === 'skills' ||
+          cmd.loadedFrom === 'commands_DEPRECATED' ||
+          cmd.hasUserSpecifiedDescription ||
+          cmd.whenToUse),
+    )
+  },
+)
+
+// Filters commands to include only skills. Skills are commands that provide
+// specialized capabilities for the model to use. They are identified by
+// loadedFrom being 'skills', 'plugin', or 'bundled', or having disableModelInvocation set.
+export const getSlashCommandToolSkills = memoize(
+  async (cwd: string): Promise<Command[]> => {
+    try {
+      const allCommands = await getCommands(cwd)
+      return allCommands.filter(
+        cmd =>
+          cmd.type === 'prompt' &&
+          cmd.source !== 'builtin' &&
+          (cmd.hasUserSpecifiedDescription || cmd.whenToUse) &&
+          (cmd.loadedFrom === 'skills' ||
+            cmd.loadedFrom === 'plugin' ||
+            cmd.loadedFrom === 'bundled' ||
+            cmd.disableModelInvocation),
+      )
+    } catch (error) {
+      logError(toError(error))
+      // Return empty array rather than throwing - skills are non-critical
+      // This prevents skill loading failures from breaking the entire system
+      logForDebugging('Returning empty skills array due to load failure')
+      return []
+    }
+  },
+)
+
+/**
+ * Commands that are safe to use in remote mode (--remote).
+ * These only affect local TUI state and don't depend on local filesystem,
+ * git, shell, IDE, MCP, or other local execution context.
+ *
+ * Used in two places:
+ * 1. Pre-filtering commands in main.tsx before REPL renders (prevents race with CCR init)
+ * 2. Preserving local-only commands in REPL's handleRemoteInit after CCR filters
+ */
+export const REMOTE_SAFE_COMMANDS: Set<Command> = new Set([
+  session, // Shows QR code / URL for remote session
+  exit, // Exit the TUI
+  clear, // Clear screen
+  help, // Show help
+  theme, // Change terminal theme
+  color, // Change agent color
+  vim, // Toggle vim mode
+  cost, // Show session cost (local cost tracking)
+  usage, // Show usage info
+  copy, // Copy last message
+  btw, // Quick note
+  feedback, // Send feedback
+  plan, // Plan mode toggle
+  keybindings, // Keybinding management
+  statusline, // Status line toggle
+  stickers, // Stickers
+  mobile, // Mobile QR code
+])
+
+/**
+ * Builtin commands of type 'local' that ARE safe to execute when received
+ * over the Remote Control bridge. These produce text output that streams
+ * back to the mobile/web client and have no terminal-only side effects.
+ *
+ * 'local-jsx' commands are blocked by type (they render Ink UI) and
+ * 'prompt' commands are allowed by type (they expand to text sent to the
+ * model) — this set only gates 'local' commands.
+ *
+ * When adding a new 'local' command that should work from mobile, add it
+ * here. Default is blocked.
+ */
+export const BRIDGE_SAFE_COMMANDS: Set<Command> = new Set(
+  [
+    compact, // Shrink context — useful mid-session from a phone
+    clear, // Wipe transcript
+    cost, // Show session cost
+    summary, // Summarize conversation
+    releaseNotes, // Show changelog
+    files, // List tracked files
+  ].filter((c): c is Command => c !== null),
+)
+
+/**
+ * Whether a slash command is safe to execute when its input arrived over the
+ * Remote Control bridge (mobile/web client).
+ *
+ * PR #19134 blanket-blocked all slash commands from bridge inbound because
+ * `/model` from iOS was popping the local Ink picker. This predicate relaxes
+ * that with an explicit allowlist: 'prompt' commands (skills) expand to text
+ * and are safe by construction; 'local' commands need an explicit opt-in via
+ * BRIDGE_SAFE_COMMANDS; 'local-jsx' commands render Ink UI and stay blocked.
+ */
+export function isBridgeSafeCommand(cmd: Command): boolean {
+  if (cmd.type === 'local-jsx') return false
+  if (cmd.type === 'prompt') return true
+  return BRIDGE_SAFE_COMMANDS.has(cmd)
+}
+
+/**
+ * Filter commands to only include those safe for remote mode.
+ * Used to pre-filter commands when rendering the REPL in --remote mode,
+ * preventing local-only commands from being briefly available before
+ * the CCR init message arrives.
+ */
+export function filterCommandsForRemoteMode(commands: Command[]): Command[] {
+  return commands.filter(cmd => REMOTE_SAFE_COMMANDS.has(cmd))
+}
+
+export function findCommand(
+  commandName: string,
+  commands: Command[],
+): Command | undefined {
+  return commands.find(
+    _ =>
+      _.name === commandName ||
+      getCommandName(_) === commandName ||
+      _.aliases?.includes(commandName),
+  )
+}
+
+export function hasCommand(commandName: string, commands: Command[]): boolean {
+  return findCommand(commandName, commands) !== undefined
+}
+
+export function getCommand(commandName: string, commands: Command[]): Command {
+  const command = findCommand(commandName, commands)
+  if (!command) {
+    throw ReferenceError(
+      `Command ${commandName} not found. Available commands: ${commands
+        .map(_ => {
+          const name = getCommandName(_)
+          return _.aliases ? `${name} (aliases: ${_.aliases.join(', ')})` : name
+        })
+        .sort((a, b) => a.localeCompare(b))
+        .join(', ')}`,
+    )
+  }
+
+  return command
+}
+
+/**
+ * Formats a command's description with its source annotation for user-facing UI.
+ * Use this in typeahead, help screens, and other places where users need to see
+ * where a command comes from.
+ *
+ * For model-facing prompts (like SkillTool), use cmd.description directly.
+ */
+export function formatDescriptionWithSource(cmd: Command): string {
+  if (cmd.type !== 'prompt') {
+    return cmd.description
+  }
+
+  if (cmd.kind === 'workflow') {
+    return `${cmd.description} (workflow)`
+  }
+
+  if (cmd.source === 'plugin') {
+    const pluginName = cmd.pluginInfo?.pluginManifest.name
+    if (pluginName) {
+      return `(${pluginName}) ${cmd.description}`
+    }
+    return `${cmd.description} (plugin)`
+  }
+
+  if (cmd.source === 'builtin' || cmd.source === 'mcp') {
+    return cmd.description
+  }
+
+  if (cmd.source === 'bundled') {
+    return `${cmd.description} (bundled)`
+  }
+
+  return `${cmd.description} (${getSettingSourceName(cmd.source)})`
+}

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 125 - 0
src/commands/add-dir/add-dir.tsx


+ 11 - 0
src/commands/add-dir/index.ts

@@ -0,0 +1,11 @@
+import type { Command } from '../../commands.js'
+
+const addDir = {
+  type: 'local-jsx',
+  name: 'add-dir',
+  description: 'Add a new working directory',
+  argumentHint: '<path>',
+  load: () => import('./add-dir.js'),
+} satisfies Command
+
+export default addDir

+ 110 - 0
src/commands/add-dir/validation.ts

@@ -0,0 +1,110 @@
+import chalk from 'chalk'
+import { stat } from 'fs/promises'
+import { dirname, resolve } from 'path'
+import type { ToolPermissionContext } from '../../Tool.js'
+import { getErrnoCode } from '../../utils/errors.js'
+import { expandPath } from '../../utils/path.js'
+import {
+  allWorkingDirectories,
+  pathInWorkingPath,
+} from '../../utils/permissions/filesystem.js'
+
+export type AddDirectoryResult =
+  | {
+      resultType: 'success'
+      absolutePath: string
+    }
+  | {
+      resultType: 'emptyPath'
+    }
+  | {
+      resultType: 'pathNotFound' | 'notADirectory'
+      directoryPath: string
+      absolutePath: string
+    }
+  | {
+      resultType: 'alreadyInWorkingDirectory'
+      directoryPath: string
+      workingDir: string
+    }
+
+export async function validateDirectoryForWorkspace(
+  directoryPath: string,
+  permissionContext: ToolPermissionContext,
+): Promise<AddDirectoryResult> {
+  if (!directoryPath) {
+    return {
+      resultType: 'emptyPath',
+    }
+  }
+
+  // resolve() strips the trailing slash expandPath can leave on absolute
+  // inputs, so /foo and /foo/ map to the same storage key (CC-33).
+  const absolutePath = resolve(expandPath(directoryPath))
+
+  // Check if path exists and is a directory (single syscall)
+  try {
+    const stats = await stat(absolutePath)
+    if (!stats.isDirectory()) {
+      return {
+        resultType: 'notADirectory',
+        directoryPath,
+        absolutePath,
+      }
+    }
+  } catch (e: unknown) {
+    const code = getErrnoCode(e)
+    // Match prior existsSync() semantics: treat any of these as "not found"
+    // rather than re-throwing. EACCES/EPERM in particular must not crash
+    // startup when a settings-configured additional directory is inaccessible.
+    if (
+      code === 'ENOENT' ||
+      code === 'ENOTDIR' ||
+      code === 'EACCES' ||
+      code === 'EPERM'
+    ) {
+      return {
+        resultType: 'pathNotFound',
+        directoryPath,
+        absolutePath,
+      }
+    }
+    throw e
+  }
+
+  // Get current permission context
+  const currentWorkingDirs = allWorkingDirectories(permissionContext)
+
+  // Check if already within an existing working directory
+  for (const workingDir of currentWorkingDirs) {
+    if (pathInWorkingPath(absolutePath, workingDir)) {
+      return {
+        resultType: 'alreadyInWorkingDirectory',
+        directoryPath,
+        workingDir,
+      }
+    }
+  }
+
+  return {
+    resultType: 'success',
+    absolutePath,
+  }
+}
+
+export function addDirHelpMessage(result: AddDirectoryResult): string {
+  switch (result.resultType) {
+    case 'emptyPath':
+      return 'Please provide a directory path.'
+    case 'pathNotFound':
+      return `Path ${chalk.bold(result.absolutePath)} was not found.`
+    case 'notADirectory': {
+      const parentDir = dirname(result.absolutePath)
+      return `${chalk.bold(result.directoryPath)} is not a directory. Did you mean to add the parent directory ${chalk.bold(parentDir)}?`
+    }
+    case 'alreadyInWorkingDirectory':
+      return `${chalk.bold(result.directoryPath)} is already accessible within the existing working directory ${chalk.bold(result.workingDir)}.`
+    case 'success':
+      return `Added ${chalk.bold(result.absolutePath)} as a working directory.`
+  }
+}

+ 109 - 0
src/commands/advisor.ts

@@ -0,0 +1,109 @@
+import type { Command } from '../commands.js'
+import type { LocalCommandCall } from '../types/command.js'
+import {
+  canUserConfigureAdvisor,
+  isValidAdvisorModel,
+  modelSupportsAdvisor,
+} from '../utils/advisor.js'
+import {
+  getDefaultMainLoopModelSetting,
+  normalizeModelStringForAPI,
+  parseUserSpecifiedModel,
+} from '../utils/model/model.js'
+import { validateModel } from '../utils/model/validateModel.js'
+import { updateSettingsForSource } from '../utils/settings/settings.js'
+
+const call: LocalCommandCall = async (args, context) => {
+  const arg = args.trim().toLowerCase()
+  const baseModel = parseUserSpecifiedModel(
+    context.getAppState().mainLoopModel ?? getDefaultMainLoopModelSetting(),
+  )
+
+  if (!arg) {
+    const current = context.getAppState().advisorModel
+    if (!current) {
+      return {
+        type: 'text',
+        value:
+          'Advisor: not set\nUse "/advisor <model>" to enable (e.g. "/advisor opus").',
+      }
+    }
+    if (!modelSupportsAdvisor(baseModel)) {
+      return {
+        type: 'text',
+        value: `Advisor: ${current} (inactive)\nThe current model (${baseModel}) does not support advisors.`,
+      }
+    }
+    return {
+      type: 'text',
+      value: `Advisor: ${current}\nUse "/advisor unset" to disable or "/advisor <model>" to change.`,
+    }
+  }
+
+  if (arg === 'unset' || arg === 'off') {
+    const prev = context.getAppState().advisorModel
+    context.setAppState(s => {
+      if (s.advisorModel === undefined) return s
+      return { ...s, advisorModel: undefined }
+    })
+    updateSettingsForSource('userSettings', { advisorModel: undefined })
+    return {
+      type: 'text',
+      value: prev
+        ? `Advisor disabled (was ${prev}).`
+        : 'Advisor already unset.',
+    }
+  }
+
+  const normalizedModel = normalizeModelStringForAPI(arg)
+  const resolvedModel = parseUserSpecifiedModel(arg)
+  const { valid, error } = await validateModel(resolvedModel)
+  if (!valid) {
+    return {
+      type: 'text',
+      value: error
+        ? `Invalid advisor model: ${error}`
+        : `Unknown model: ${arg} (${resolvedModel})`,
+    }
+  }
+
+  if (!isValidAdvisorModel(resolvedModel)) {
+    return {
+      type: 'text',
+      value: `The model ${arg} (${resolvedModel}) cannot be used as an advisor`,
+    }
+  }
+
+  context.setAppState(s => {
+    if (s.advisorModel === normalizedModel) return s
+    return { ...s, advisorModel: normalizedModel }
+  })
+  updateSettingsForSource('userSettings', { advisorModel: normalizedModel })
+
+  if (!modelSupportsAdvisor(baseModel)) {
+    return {
+      type: 'text',
+      value: `Advisor set to ${normalizedModel}.\nNote: Your current model (${baseModel}) does not support advisors. Switch to a supported model to use the advisor.`,
+    }
+  }
+
+  return {
+    type: 'text',
+    value: `Advisor set to ${normalizedModel}.`,
+  }
+}
+
+const advisor = {
+  type: 'local',
+  name: 'advisor',
+  description: 'Configure the advisor model',
+  argumentHint: '[<model>|off]',
+  isEnabled: () => canUserConfigureAdvisor(),
+  get isHidden() {
+    return !canUserConfigureAdvisor()
+  },
+  supportsNonInteractive: true,
+  load: () => Promise.resolve({ call }),
+} satisfies Command
+
+export default advisor

+ 5 - 0
src/commands/agents-platform/index.ts

@@ -0,0 +1,5 @@
+// Stub: ant-only command
+export default {
+  command: 'agents-platform',
+  description: 'Not available',
+};

+ 12 - 0
src/commands/agents/agents.tsx

@@ -0,0 +1,12 @@
+import * as React from 'react';
+import { AgentsMenu } from '../../components/agents/AgentsMenu.js';
+import type { ToolUseContext } from '../../Tool.js';
+import { getTools } from '../../tools.js';
+import type { LocalJSXCommandOnDone } from '../../types/command.js';
+export async function call(onDone: LocalJSXCommandOnDone, context: ToolUseContext): Promise<React.ReactNode> {
+  const appState = context.getAppState();
+  const permissionContext = appState.toolPermissionContext;
+  const tools = getTools(permissionContext);
+  return <AgentsMenu tools={tools} onExit={onDone} />;
+}
+//# sourceMappingURL=data:application/json;charset=utf-8;base64,eyJ2ZXJzaW9uIjozLCJuYW1lcyI6WyJSZWFjdCIsIkFnZW50c01lbnUiLCJUb29sVXNlQ29udGV4dCIsImdldFRvb2xzIiwiTG9jYWxKU1hDb21tYW5kT25Eb25lIiwiY2FsbCIsIm9uRG9uZSIsImNvbnRleHQiLCJQcm9taXNlIiwiUmVhY3ROb2RlIiwiYXBwU3RhdGUiLCJnZXRBcHBTdGF0ZSIsInBlcm1pc3Npb25Db250ZXh0IiwidG9vbFBlcm1pc3Npb25Db250ZXh0IiwidG9vbHMiXSwic291cmNlcyI6WyJhZ2VudHMudHN4Il0sInNvdXJjZXNDb250ZW50IjpbImltcG9ydCAqIGFzIFJlYWN0IGZyb20gJ3JlYWN0J1xuaW1wb3J0IHsgQWdlbnRzTWVudSB9IGZyb20gJy4uLy4uL2NvbXBvbmVudHMvYWdlbnRzL0FnZW50c01lbnUuanMnXG5pbXBvcnQgdHlwZSB7IFRvb2xVc2VDb250ZXh0IH0gZnJvbSAnLi4vLi4vVG9vbC5qcydcbmltcG9ydCB7IGdldFRvb2xzIH0gZnJvbSAnLi4vLi4vdG9vbHMuanMnXG5pbXBvcnQgdHlwZSB7IExvY2FsSlNYQ29tbWFuZE9uRG9uZSB9IGZyb20gJy4uLy4uL3R5cGVzL2NvbW1hbmQuanMnXG5cbmV4cG9ydCBhc3luYyBmdW5jdGlvbiBjYWxsKFxuICBvbkRvbmU6IExvY2FsSlNYQ29tbWFuZE9uRG9uZSxcbiAgY29udGV4dDogVG9vbFVzZUNvbnRleHQsXG4pOiBQcm9taXNlPFJlYWN0LlJlYWN0Tm9kZT4ge1xuICBjb25zdCBhcHBTdGF0ZSA9IGNvbnRleHQuZ2V0QXBwU3RhdGUoKVxuICBjb25zdCBwZXJtaXNzaW9uQ29udGV4dCA9IGFwcFN0YXRlLnRvb2xQZXJtaXNzaW9uQ29udGV4dFxuICBjb25zdCB0b29scyA9IGdldFRvb2xzKHBlcm1pc3Npb25Db250ZXh0KVxuXG4gIHJldHVybiA8QWdlbnRzTWVudSB0b29scz17dG9vbHN9IG9uRXhpdD17b25Eb25lfSAvPlxufVxuIl0sIm1hcHBpbmdzIjoiQUFBQSxPQUFPLEtBQUtBLEtBQUssTUFBTSxPQUFPO0FBQzlCLFNBQVNDLFVBQVUsUUFBUSx1Q0FBdUM7QUFDbEUsY0FBY0MsY0FBYyxRQUFRLGVBQWU7QUFDbkQsU0FBU0MsUUFBUSxRQUFRLGdCQUFnQjtBQUN6QyxjQUFjQyxxQkFBcUIsUUFBUSx3QkFBd0I7QUFFbkUsT0FBTyxlQUFlQyxJQUFJQSxDQUN4QkMsTUFBTSxFQUFFRixxQkFBcUIsRUFDN0JHLE9BQU8sRUFBRUwsY0FBYyxDQUN4QixFQUFFTSxPQUFPLENBQUNSLEtBQUssQ0FBQ1MsU0FBUyxDQUFDLENBQUM7RUFDMUIsTUFBTUMsUUFBUSxHQUFHSCxPQUFPLENBQUNJLFdBQVcsQ0FBQyxDQUFDO0VBQ3RDLE1BQU1DLGlCQUFpQixHQUFHRixRQUFRLENBQUNHLHFCQUFxQjtFQUN4RCxNQUFNQyxLQUFLLEdBQUdYLFFBQVEsQ0FBQ1MsaUJBQWlCLENBQUM7RUFFekMsT0FBTyxDQUFDLFVBQVUsQ0FBQyxLQUFLLENBQUMsQ0FBQ0UsS0FBSyxDQUFDLENBQUMsTUFBTSxDQUFDLENBQUNSLE1BQU0sQ0FBQyxHQUFHO0FBQ3JEIiwiaWdub3JlTGlzdCI6W119

+ 10 - 0
src/commands/agents/index.ts

@@ -0,0 +1,10 @@
+import type { Command } from '../../commands.js'
+
+const agents = {
+  type: 'local-jsx',
+  name: 'agents',
+  description: 'Manage agent configurations',
+  load: () => import('./agents.js'),
+} satisfies Command
+
+export default agents

+ 1 - 0
src/commands/ant-trace/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 8 - 0
src/commands/assistant/assistant.tsx

@@ -0,0 +1,8 @@
+// Stub: not included in leak
+import React from 'react';
+export function NewInstallWizard(_props: any): React.ReactElement | null {
+  return null;
+}
+export async function computeDefaultInstallDir(): Promise<string> {
+  return '';
+}

+ 1 - 0
src/commands/autofix-pr/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 1 - 0
src/commands/backfill-sessions/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 296 - 0
src/commands/branch/branch.ts

@@ -0,0 +1,296 @@
+import { randomUUID, type UUID } from 'crypto'
+import { mkdir, readFile, writeFile } from 'fs/promises'
+import { getOriginalCwd, getSessionId } from '../../bootstrap/state.js'
+import type { LocalJSXCommandContext } from '../../commands.js'
+import { logEvent } from '../../services/analytics/index.js'
+import type { LocalJSXCommandOnDone } from '../../types/command.js'
+import type {
+  ContentReplacementEntry,
+  Entry,
+  LogOption,
+  SerializedMessage,
+  TranscriptMessage,
+} from '../../types/logs.js'
+import { parseJSONL } from '../../utils/json.js'
+import {
+  getProjectDir,
+  getTranscriptPath,
+  getTranscriptPathForSession,
+  isTranscriptMessage,
+  saveCustomTitle,
+  searchSessionsByCustomTitle,
+} from '../../utils/sessionStorage.js'
+import { jsonStringify } from '../../utils/slowOperations.js'
+import { escapeRegExp } from '../../utils/stringUtils.js'
+
+type TranscriptEntry = TranscriptMessage & {
+  forkedFrom?: {
+    sessionId: string
+    messageUuid: UUID
+  }
+}
+
+/**
+ * Derive a single-line title base from the first user message.
+ * Collapses whitespace — multiline first messages (pasted stacks, code)
+ * otherwise flow into the saved title and break the resume hint.
+ */
+export function deriveFirstPrompt(
+  firstUserMessage: Extract<SerializedMessage, { type: 'user' }> | undefined,
+): string {
+  const content = firstUserMessage?.message?.content
+  if (!content) return 'Branched conversation'
+  const raw =
+    typeof content === 'string'
+      ? content
+      : content.find(
+          (block): block is { type: 'text'; text: string } =>
+            block.type === 'text',
+        )?.text
+  if (!raw) return 'Branched conversation'
+  return (
+    raw.replace(/\s+/g, ' ').trim().slice(0, 100) || 'Branched conversation'
+  )
+}
+
+/**
+ * Creates a fork of the current conversation by copying from the transcript file.
+ * Preserves all original metadata (timestamps, gitBranch, etc.) while updating
+ * sessionId and adding forkedFrom traceability.
+ */
+async function createFork(customTitle?: string): Promise<{
+  sessionId: UUID
+  title: string | undefined
+  forkPath: string
+  serializedMessages: SerializedMessage[]
+  contentReplacementRecords: ContentReplacementEntry['replacements']
+}> {
+  const forkSessionId = randomUUID() as UUID
+  const originalSessionId = getSessionId()
+  const projectDir = getProjectDir(getOriginalCwd())
+  const forkSessionPath = getTranscriptPathForSession(forkSessionId)
+  const currentTranscriptPath = getTranscriptPath()
+
+  // Ensure project directory exists
+  await mkdir(projectDir, { recursive: true, mode: 0o700 })
+
+  // Read current transcript file
+  let transcriptContent: Buffer
+  try {
+    transcriptContent = await readFile(currentTranscriptPath)
+  } catch {
+    throw new Error('No conversation to branch')
+  }
+
+  if (transcriptContent.length === 0) {
+    throw new Error('No conversation to branch')
+  }
+
+  // Parse all transcript entries (messages + metadata entries like content-replacement)
+  const entries = parseJSONL<Entry>(transcriptContent)
+
+  // Filter to only main conversation messages (exclude sidechains and non-message entries)
+  const mainConversationEntries = entries.filter(
+    (entry): entry is TranscriptMessage =>
+      isTranscriptMessage(entry) && !entry.isSidechain,
+  )
+
+  // Content-replacement entries for the original session. These record which
+  // tool_result blocks were replaced with previews by the per-message budget.
+  // Without them in the fork JSONL, `claude -r {forkId}` reconstructs state
+  // with an empty replacements Map → previously-replaced results are classified
+  // as FROZEN and sent as full content (prompt cache miss + permanent overage).
+  // sessionId must be rewritten since loadTranscriptFile keys lookup by the
+  // session's messages' sessionId.
+  const contentReplacementRecords = entries
+    .filter(
+      (entry): entry is ContentReplacementEntry =>
+        entry.type === 'content-replacement' &&
+        entry.sessionId === originalSessionId,
+    )
+    .flatMap(entry => entry.replacements)
+
+  if (mainConversationEntries.length === 0) {
+    throw new Error('No messages to branch')
+  }
+
+  // Build forked entries with new sessionId and preserved metadata
+  let parentUuid: UUID | null = null
+  const lines: string[] = []
+  const serializedMessages: SerializedMessage[] = []
+
+  for (const entry of mainConversationEntries) {
+    // Create forked transcript entry preserving all original metadata
+    const forkedEntry: TranscriptEntry = {
+      ...entry,
+      sessionId: forkSessionId,
+      parentUuid,
+      isSidechain: false,
+      forkedFrom: {
+        sessionId: originalSessionId,
+        messageUuid: entry.uuid,
+      },
+    }
+
+    // Build serialized message for LogOption
+    const serialized: SerializedMessage = {
+      ...entry,
+      sessionId: forkSessionId,
+    }
+
+    serializedMessages.push(serialized)
+    lines.push(jsonStringify(forkedEntry))
+    if (entry.type !== 'progress') {
+      parentUuid = entry.uuid
+    }
+  }
+
+  // Append content-replacement entry (if any) with the fork's sessionId.
+  // Written as a SINGLE entry (same shape as insertContentReplacement) so
+  // loadTranscriptFile's content-replacement branch picks it up.
+  if (contentReplacementRecords.length > 0) {
+    const forkedReplacementEntry: ContentReplacementEntry = {
+      type: 'content-replacement',
+      sessionId: forkSessionId,
+      replacements: contentReplacementRecords,
+    }
+    lines.push(jsonStringify(forkedReplacementEntry))
+  }
+
+  // Write the fork session file
+  await writeFile(forkSessionPath, lines.join('\n') + '\n', {
+    encoding: 'utf8',
+    mode: 0o600,
+  })
+
+  return {
+    sessionId: forkSessionId,
+    title: customTitle,
+    forkPath: forkSessionPath,
+    serializedMessages,
+    contentReplacementRecords,
+  }
+}
+
+/**
+ * Generates a unique fork name by checking for collisions with existing session names.
+ * If "baseName (Branch)" already exists, tries "baseName (Branch 2)", "baseName (Branch 3)", etc.
+ */
+async function getUniqueForkName(baseName: string): Promise<string> {
+  const candidateName = `${baseName} (Branch)`
+
+  // Check if this exact name already exists
+  const existingWithExactName = await searchSessionsByCustomTitle(
+    candidateName,
+    { exact: true },
+  )
+
+  if (existingWithExactName.length === 0) {
+    return candidateName
+  }
+
+  // Name collision - find a unique numbered suffix
+  // Search for all sessions that start with the base pattern
+  const existingForks = await searchSessionsByCustomTitle(`${baseName} (Branch`)
+
+  // Extract existing fork numbers to find the next available
+  const usedNumbers = new Set<number>([1]) // Consider " (Branch)" as number 1
+  const forkNumberPattern = new RegExp(
+    `^${escapeRegExp(baseName)} \\(Branch(?: (\\d+))?\\)$`,
+  )
+
+  for (const session of existingForks) {
+    const match = session.customTitle?.match(forkNumberPattern)
+    if (match) {
+      if (match[1]) {
+        usedNumbers.add(parseInt(match[1], 10))
+      } else {
+        usedNumbers.add(1) // " (Branch)" without number is treated as 1
+      }
+    }
+  }
+
+  // Find the next available number
+  let nextNumber = 2
+  while (usedNumbers.has(nextNumber)) {
+    nextNumber++
+  }
+
+  return `${baseName} (Branch ${nextNumber})`
+}
+
+export async function call(
+  onDone: LocalJSXCommandOnDone,
+  context: LocalJSXCommandContext,
+  args: string,
+): Promise<React.ReactNode> {
+  const customTitle = args?.trim() || undefined
+
+  const originalSessionId = getSessionId()
+
+  try {
+    const {
+      sessionId,
+      title,
+      forkPath,
+      serializedMessages,
+      contentReplacementRecords,
+    } = await createFork(customTitle)
+
+    // Build LogOption for resume
+    const now = new Date()
+    const firstPrompt = deriveFirstPrompt(
+      serializedMessages.find(m => m.type === 'user'),
+    )
+
+    // Save custom title - use provided title or firstPrompt as default
+    // This ensures /status and /resume show the same session name
+    // Always add " (Branch)" suffix to make it clear this is a branched session
+    // Handle collisions by adding a number suffix (e.g., " (Branch 2)", " (Branch 3)")
+    const baseName = title ?? firstPrompt
+    const effectiveTitle = await getUniqueForkName(baseName)
+    await saveCustomTitle(sessionId, effectiveTitle, forkPath)
+
+    logEvent('tengu_conversation_forked', {
+      message_count: serializedMessages.length,
+      has_custom_title: !!title,
+    })
+
+    const forkLog: LogOption = {
+      date: now.toISOString().split('T')[0]!,
+      messages: serializedMessages,
+      fullPath: forkPath,
+      value: now.getTime(),
+      created: now,
+      modified: now,
+      firstPrompt,
+      messageCount: serializedMessages.length,
+      isSidechain: false,
+      sessionId,
+      customTitle: effectiveTitle,
+      contentReplacements: contentReplacementRecords,
+    }
+
+    // Resume into the fork
+    const titleInfo = title ? ` "${title}"` : ''
+    const resumeHint = `\nTo resume the original: claude -r ${originalSessionId}`
+    const successMessage = `Branched conversation${titleInfo}. You are now in the branch.${resumeHint}`
+
+    if (context.resume) {
+      await context.resume(sessionId, forkLog, 'fork')
+      onDone(successMessage, { display: 'system' })
+    } else {
+      // Fallback if resume not available
+      onDone(
+        `Branched conversation${titleInfo}. Resume with: /resume ${sessionId}`,
+      )
+    }
+
+    return null
+  } catch (error) {
+    const message =
+      error instanceof Error ? error.message : 'Unknown error occurred'
+    onDone(`Failed to branch conversation: ${message}`)
+    return null
+  }
+}

+ 14 - 0
src/commands/branch/index.ts

@@ -0,0 +1,14 @@
+import { feature } from 'bun:bundle'
+import type { Command } from '../../commands.js'
+
+const branch = {
+  type: 'local-jsx',
+  name: 'branch',
+  // 'fork' alias only when /fork doesn't exist as its own command
+  aliases: feature('FORK_SUBAGENT') ? [] : ['fork'],
+  description: 'Create a branch of the current conversation at this point',
+  argumentHint: '[name]',
+  load: () => import('./branch.js'),
+} satisfies Command
+
+export default branch

+ 1 - 0
src/commands/break-cache/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

+ 200 - 0
src/commands/bridge-kick.ts

@@ -0,0 +1,200 @@
+import { getBridgeDebugHandle } from '../bridge/bridgeDebug.js'
+import type { Command } from '../commands.js'
+import type { LocalCommandCall } from '../types/command.js'
+
+/**
+ * Ant-only: inject bridge failure states to manually test recovery paths.
+ *
+ *   /bridge-kick close 1002            — fire ws_closed with code 1002
+ *   /bridge-kick close 1006            — fire ws_closed with code 1006
+ *   /bridge-kick poll 404              — next poll throws 404/not_found_error
+ *   /bridge-kick poll 404 <type>       — next poll throws 404 with error_type
+ *   /bridge-kick poll 401              — next poll throws 401 (auth)
+ *   /bridge-kick poll transient        — next poll throws axios-style rejection
+ *   /bridge-kick register fail         — next register (inside doReconnect) transient-fails
+ *   /bridge-kick register fail 3       — next 3 registers transient-fail
+ *   /bridge-kick register fatal        — next register 403s (terminal)
+ *   /bridge-kick reconnect-session fail — POST /bridge/reconnect fails (→ Strategy 2)
+ *   /bridge-kick heartbeat 401         — next heartbeat 401s (JWT expired)
+ *   /bridge-kick reconnect             — call doReconnect directly (= SIGUSR2)
+ *   /bridge-kick status                — print current bridge state
+ *
+ * Workflow: connect Remote Control, run a subcommand, `tail -f debug.log`
+ * and watch [bridge:repl] / [bridge:debug] lines for the recovery reaction.
+ *
+ * Composite sequences — the failure modes in the BQ data are chains, not
+ * single events. Queue faults then fire the trigger:
+ *
+ *   # #22148 residual: ws_closed → register transient-blips → teardown?
+ *   /bridge-kick register fail 2
+ *   /bridge-kick close 1002
+ *   → expect: doReconnect tries register, fails, returns false → teardown
+ *     (demonstrates the retry gap that needs fixing)
+ *
+ *   # Dead gate: poll 404/not_found_error → does onEnvironmentLost fire?
+ *   /bridge-kick poll 404
+ *   → expect: tengu_bridge_repl_fatal_error (gate is dead — 147K/wk)
+ *     after fix: tengu_bridge_repl_env_lost → doReconnect
+ */
+
+const USAGE = `/bridge-kick <subcommand>
+  close <code>              fire ws_closed with the given code (e.g. 1002)
+  poll <status> [type]      next poll throws BridgeFatalError(status, type)
+  poll transient            next poll throws axios-style rejection (5xx/net)
+  register fail [N]         next N registers transient-fail (default 1)
+  register fatal            next register 403s (terminal)
+  reconnect-session fail    next POST /bridge/reconnect fails
+  heartbeat <status>        next heartbeat throws BridgeFatalError(status)
+  reconnect                 call reconnectEnvironmentWithSession directly
+  status                    print bridge state`
+
+const call: LocalCommandCall = async args => {
+  const h = getBridgeDebugHandle()
+  if (!h) {
+    return {
+      type: 'text',
+      value:
+        'No bridge debug handle registered. Remote Control must be connected (USER_TYPE=ant).',
+    }
+  }
+
+  const [sub, a, b] = args.trim().split(/\s+/)
+
+  switch (sub) {
+    case 'close': {
+      const code = Number(a)
+      if (!Number.isFinite(code)) {
+        return { type: 'text', value: `close: need a numeric code\n${USAGE}` }
+      }
+      h.fireClose(code)
+      return {
+        type: 'text',
+        value: `Fired transport close(${code}). Watch debug.log for [bridge:repl] recovery.`,
+      }
+    }
+
+    case 'poll': {
+      if (a === 'transient') {
+        h.injectFault({
+          method: 'pollForWork',
+          kind: 'transient',
+          status: 503,
+          count: 1,
+        })
+        h.wakePollLoop()
+        return {
+          type: 'text',
+          value:
+            'Next poll will throw a transient (axios rejection). Poll loop woken.',
+        }
+      }
+      const status = Number(a)
+      if (!Number.isFinite(status)) {
+        return {
+          type: 'text',
+          value: `poll: need 'transient' or a status code\n${USAGE}`,
+        }
+      }
+      // Default to what the server ACTUALLY sends for 404 (BQ-verified),
+      // so `/bridge-kick poll 404` reproduces the real 147K/week state.
+      const errorType =
+        b ?? (status === 404 ? 'not_found_error' : 'authentication_error')
+      h.injectFault({
+        method: 'pollForWork',
+        kind: 'fatal',
+        status,
+        errorType,
+        count: 1,
+      })
+      h.wakePollLoop()
+      return {
+        type: 'text',
+        value: `Next poll will throw BridgeFatalError(${status}, ${errorType}). Poll loop woken.`,
+      }
+    }
+
+    case 'register': {
+      if (a === 'fatal') {
+        h.injectFault({
+          method: 'registerBridgeEnvironment',
+          kind: 'fatal',
+          status: 403,
+          errorType: 'permission_error',
+          count: 1,
+        })
+        return {
+          type: 'text',
+          value:
+            'Next registerBridgeEnvironment will 403. Trigger with close/reconnect.',
+        }
+      }
+      const n = Number(b) || 1
+      h.injectFault({
+        method: 'registerBridgeEnvironment',
+        kind: 'transient',
+        status: 503,
+        count: n,
+      })
+      return {
+        type: 'text',
+        value: `Next ${n} registerBridgeEnvironment call(s) will transient-fail. Trigger with close/reconnect.`,
+      }
+    }
+
+    case 'reconnect-session': {
+      h.injectFault({
+        method: 'reconnectSession',
+        kind: 'fatal',
+        status: 404,
+        errorType: 'not_found_error',
+        count: 2,
+      })
+      return {
+        type: 'text',
+        value:
+          'Next 2 POST /bridge/reconnect calls will 404. doReconnect Strategy 1 falls through to Strategy 2.',
+      }
+    }
+
+    case 'heartbeat': {
+      const status = Number(a) || 401
+      h.injectFault({
+        method: 'heartbeatWork',
+        kind: 'fatal',
+        status,
+        errorType: status === 401 ? 'authentication_error' : 'not_found_error',
+        count: 1,
+      })
+      return {
+        type: 'text',
+        value: `Next heartbeat will ${status}. Watch for onHeartbeatFatal → work-state teardown.`,
+      }
+    }
+
+    case 'reconnect': {
+      h.forceReconnect()
+      return {
+        type: 'text',
+        value: 'Called reconnectEnvironmentWithSession(). Watch debug.log.',
+      }
+    }
+
+    case 'status': {
+      return { type: 'text', value: h.describe() }
+    }
+
+    default:
+      return { type: 'text', value: USAGE }
+  }
+}
+
+const bridgeKick = {
+  type: 'local',
+  name: 'bridge-kick',
+  description: 'Inject bridge failure states for manual recovery testing',
+  isEnabled: () => process.env.USER_TYPE === 'ant',
+  supportsNonInteractive: false,
+  load: () => Promise.resolve({ call }),
+} satisfies Command
+
+export default bridgeKick

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 508 - 0
src/commands/bridge/bridge.tsx


+ 26 - 0
src/commands/bridge/index.ts

@@ -0,0 +1,26 @@
+import { feature } from 'bun:bundle'
+import { isBridgeEnabled } from '../../bridge/bridgeEnabled.js'
+import type { Command } from '../../commands.js'
+
+function isEnabled(): boolean {
+  if (!feature('BRIDGE_MODE')) {
+    return false
+  }
+  return isBridgeEnabled()
+}
+
+const bridge = {
+  type: 'local-jsx',
+  name: 'remote-control',
+  aliases: ['rc'],
+  description: 'Connect this terminal for remote-control sessions',
+  argumentHint: '[name]',
+  isEnabled,
+  get isHidden() {
+    return !isEnabled()
+  },
+  immediate: true,
+  load: () => import('./bridge.js'),
+} satisfies Command
+
+export default bridge

+ 130 - 0
src/commands/brief.ts

@@ -0,0 +1,130 @@
+import { feature } from 'bun:bundle'
+import { z } from 'zod/v4'
+import { getKairosActive, setUserMsgOptIn } from '../bootstrap/state.js'
+import { getFeatureValue_CACHED_MAY_BE_STALE } from '../services/analytics/growthbook.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../services/analytics/index.js'
+import type { ToolUseContext } from '../Tool.js'
+import { isBriefEntitled } from '../tools/BriefTool/BriefTool.js'
+import { BRIEF_TOOL_NAME } from '../tools/BriefTool/prompt.js'
+import type {
+  Command,
+  LocalJSXCommandContext,
+  LocalJSXCommandOnDone,
+} from '../types/command.js'
+import { lazySchema } from '../utils/lazySchema.js'
+
+// Zod guards against fat-fingered GB pushes (same pattern as pollConfig.ts /
+// cronScheduler.ts). A malformed config falls back to DEFAULT_BRIEF_CONFIG
+// entirely rather than being partially trusted.
+const briefConfigSchema = lazySchema(() =>
+  z.object({
+    enable_slash_command: z.boolean(),
+  }),
+)
+type BriefConfig = z.infer<ReturnType<typeof briefConfigSchema>>
+
+const DEFAULT_BRIEF_CONFIG: BriefConfig = {
+  enable_slash_command: false,
+}
+
+// No TTL — this gate controls slash-command *visibility*, not a kill switch.
+// CACHED_MAY_BE_STALE still has one background-update flip (first call kicks
+// off fetch; second call sees fresh value), but no additional flips after that.
+// The tool-availability gate (tengu_kairos_brief in isBriefEnabled) keeps its
+// 5-min TTL because that one IS a kill switch.
+function getBriefConfig(): BriefConfig {
+  const raw = getFeatureValue_CACHED_MAY_BE_STALE<unknown>(
+    'tengu_kairos_brief_config',
+    DEFAULT_BRIEF_CONFIG,
+  )
+  const parsed = briefConfigSchema().safeParse(raw)
+  return parsed.success ? parsed.data : DEFAULT_BRIEF_CONFIG
+}
+
+const brief = {
+  type: 'local-jsx',
+  name: 'brief',
+  description: 'Toggle brief-only mode',
+  isEnabled: () => {
+    if (feature('KAIROS') || feature('KAIROS_BRIEF')) {
+      return getBriefConfig().enable_slash_command
+    }
+    return false
+  },
+  immediate: true,
+  load: () =>
+    Promise.resolve({
+      async call(
+        onDone: LocalJSXCommandOnDone,
+        context: ToolUseContext & LocalJSXCommandContext,
+      ): Promise<React.ReactNode> {
+        const current = context.getAppState().isBriefOnly
+        const newState = !current
+
+        // Entitlement check only gates the on-transition — off is always
+        // allowed so a user whose GB gate flipped mid-session isn't stuck.
+        if (newState && !isBriefEntitled()) {
+          logEvent('tengu_brief_mode_toggled', {
+            enabled: false,
+            gated: true,
+            source:
+              'slash_command' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+          })
+          onDone('Brief tool is not enabled for your account', {
+            display: 'system',
+          })
+          return null
+        }
+
+        // Two-way: userMsgOptIn tracks isBriefOnly so the tool is available
+        // exactly when brief mode is on. This invalidates prompt cache on
+        // each toggle (tool list changes), but a stale tool list is worse —
+        // when /brief is enabled mid-session the model was previously left
+        // without the tool, emitting plain text the filter hides.
+        setUserMsgOptIn(newState)
+
+        context.setAppState(prev => {
+          if (prev.isBriefOnly === newState) return prev
+          return { ...prev, isBriefOnly: newState }
+        })
+
+        logEvent('tengu_brief_mode_toggled', {
+          enabled: newState,
+          gated: false,
+          source:
+            'slash_command' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+        })
+
+        // The tool list change alone isn't a strong enough signal mid-session
+        // (model may keep emitting plain text from inertia, or keep calling a
+        // tool that just vanished). Inject an explicit reminder into the next
+        // turn's context so the transition is unambiguous.
+        // Skip when Kairos is active: isBriefEnabled() short-circuits on
+        // getKairosActive() so the tool never actually leaves the list, and
+        // the Kairos system prompt already mandates SendUserMessage.
+        // Inline <system-reminder> wrap — importing wrapInSystemReminder from
+        // utils/messages.ts pulls constants/xml.ts into the bridge SDK bundle
+        // via this module's import chain, tripping the excluded-strings check.
+        const metaMessages = getKairosActive()
+          ? undefined
+          : [
+              `<system-reminder>\n${
+                newState
+                  ? `Brief mode is now enabled. Use the ${BRIEF_TOOL_NAME} tool for all user-facing output — plain text outside it is hidden from the user's view.`
+                  : `Brief mode is now disabled. The ${BRIEF_TOOL_NAME} tool is no longer available — reply with plain text.`
+              }\n</system-reminder>`,
+            ]
+
+        onDone(
+          newState ? 'Brief-only mode enabled' : 'Brief-only mode disabled',
+          { display: 'system', metaMessages },
+        )
+        return null
+      },
+    }),
+} satisfies Command
+
+export default brief

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 242 - 0
src/commands/btw/btw.tsx


+ 13 - 0
src/commands/btw/index.ts

@@ -0,0 +1,13 @@
+import type { Command } from '../../commands.js'
+
+const btw = {
+  type: 'local-jsx',
+  name: 'btw',
+  description:
+    'Ask a quick side question without interrupting the main conversation',
+  immediate: true,
+  argumentHint: '<question>',
+  load: () => import('./btw.js'),
+} satisfies Command
+
+export default btw

+ 1 - 0
src/commands/bughunter/index.js

@@ -0,0 +1 @@
+export default { isEnabled: () => false, isHidden: true, name: 'stub' };

파일 크기가 너무 크기때문에 변경 상태를 표시하지 않습니다.
+ 284 - 0
src/commands/chrome/chrome.tsx


+ 13 - 0
src/commands/chrome/index.ts

@@ -0,0 +1,13 @@
+import { getIsNonInteractiveSession } from '../../bootstrap/state.js'
+import type { Command } from '../../commands.js'
+
+const command: Command = {
+  name: 'chrome',
+  description: 'Claude in Chrome (Beta) settings',
+  availability: ['claude-ai'],
+  isEnabled: () => !getIsNonInteractiveSession(),
+  type: 'local-jsx',
+  load: () => import('./chrome.js'),
+}
+
+export default command

+ 144 - 0
src/commands/clear/caches.ts

@@ -0,0 +1,144 @@
+/**
+ * Session cache clearing utilities.
+ * This module is imported at startup by main.tsx, so keep imports minimal.
+ */
+import { feature } from 'bun:bundle'
+import {
+  clearInvokedSkills,
+  setLastEmittedDate,
+} from '../../bootstrap/state.js'
+import { clearCommandsCache } from '../../commands.js'
+import { getSessionStartDate } from '../../constants/common.js'
+import {
+  getGitStatus,
+  getSystemContext,
+  getUserContext,
+  setSystemPromptInjection,
+} from '../../context.js'
+import { clearFileSuggestionCaches } from '../../hooks/fileSuggestions.js'
+import { clearAllPendingCallbacks } from '../../hooks/useSwarmPermissionPoller.js'
+import { clearAllDumpState } from '../../services/api/dumpPrompts.js'
+import { resetPromptCacheBreakDetection } from '../../services/api/promptCacheBreakDetection.js'
+import { clearAllSessions } from '../../services/api/sessionIngress.js'
+import { runPostCompactCleanup } from '../../services/compact/postCompactCleanup.js'
+import { resetAllLSPDiagnosticState } from '../../services/lsp/LSPDiagnosticRegistry.js'
+import { clearTrackedMagicDocs } from '../../services/MagicDocs/magicDocs.js'
+import { clearDynamicSkills } from '../../skills/loadSkillsDir.js'
+import { resetSentSkillNames } from '../../utils/attachments.js'
+import { clearCommandPrefixCaches } from '../../utils/bash/commands.js'
+import { resetGetMemoryFilesCache } from '../../utils/claudemd.js'
+import { clearRepositoryCaches } from '../../utils/detectRepository.js'
+import { clearResolveGitDirCache } from '../../utils/git/gitFilesystem.js'
+import { clearStoredImagePaths } from '../../utils/imageStore.js'
+import { clearSessionEnvVars } from '../../utils/sessionEnvVars.js'
+
+/**
+ * Clear all session-related caches.
+ * Call this when resuming a session to ensure fresh file/skill discovery.
+ * This is a subset of what clearConversation does - it only clears caches
+ * without affecting messages, session ID, or triggering hooks.
+ *
+ * @param preservedAgentIds - Agent IDs whose per-agent state should survive
+ *   the clear (e.g., background tasks preserved across /clear). When non-empty,
+ *   agentId-keyed state (invoked skills) is selectively cleared and requestId-keyed
+ *   state (pending permission callbacks, dump state, cache-break tracking) is left
+ *   intact since it cannot be safely scoped to the main session.
+ */
+export function clearSessionCaches(
+  preservedAgentIds: ReadonlySet<string> = new Set(),
+): void {
+  const hasPreserved = preservedAgentIds.size > 0
+  // Clear context caches
+  getUserContext.cache.clear?.()
+  getSystemContext.cache.clear?.()
+  getGitStatus.cache.clear?.()
+  getSessionStartDate.cache.clear?.()
+  // Clear file suggestion caches (for @ mentions)
+  clearFileSuggestionCaches()
+
+  // Clear commands/skills cache
+  clearCommandsCache()
+
+  // Clear prompt cache break detection state
+  if (!hasPreserved) resetPromptCacheBreakDetection()
+
+  // Clear system prompt injection (cache breaker)
+  setSystemPromptInjection(null)
+
+  // Clear last emitted date so it's re-detected on next turn
+  setLastEmittedDate(null)
+
+  // Run post-compaction cleanup (clears system prompt sections, microcompact tracking,
+  // classifier approvals, speculative checks, and — for main-thread compacts — memory
+  // files cache with load_reason 'compact').
+  runPostCompactCleanup()
+  // Reset sent skill names so the skill listing is re-sent after /clear.
+  // runPostCompactCleanup intentionally does NOT reset this (post-compact
+  // re-injection costs ~4K tokens), but /clear wipes messages entirely so
+  // the model needs the full listing again.
+  resetSentSkillNames()
+  // Override the memory cache reset with 'session_start': clearSessionCaches is called
+  // from /clear and --resume/--continue, which are NOT compaction events. Without this,
+  // the InstructionsLoaded hook would fire with load_reason 'compact' instead of
+  // 'session_start' on the next getMemoryFiles() call.
+  resetGetMemoryFilesCache('session_start')
+
+  // Clear stored image paths cache
+  clearStoredImagePaths()
+
+  // Clear all session ingress caches (lastUuidMap, sequentialAppendBySession)
+  clearAllSessions()
+  // Clear swarm permission pending callbacks
+  if (!hasPreserved) clearAllPendingCallbacks()
+
+  // Clear tungsten session usage tracking
+  if (process.env.USER_TYPE === 'ant') {
+    void import('../../tools/TungstenTool/TungstenTool.js').then(
+      ({ clearSessionsWithTungstenUsage, resetInitializationState }) => {
+        clearSessionsWithTungstenUsage()
+        resetInitializationState()
+      },
+    )
+  }
+  // Clear attribution caches (file content cache, pending bash states)
+  // Dynamic import to preserve dead code elimination for COMMIT_ATTRIBUTION feature flag
+  if (feature('COMMIT_ATTRIBUTION')) {
+    void import('../../utils/attributionHooks.js').then(
+      ({ clearAttributionCaches }) => clearAttributionCaches(),
+    )
+  }
+  // Clear repository detection caches
+  clearRepositoryCaches()
+  // Clear bash command prefix caches (Haiku-extracted prefixes)
+  clearCommandPrefixCaches()
+  // Clear dump prompts state
+  if (!hasPreserved) clearAllDumpState()
+  // Clear invoked skills cache (each entry holds full skill file content)
+  clearInvokedSkills(preservedAgentIds)
+  // Clear git dir resolution cache
+  clearResolveGitDirCache()
+  // Clear dynamic skills (loaded from skill directories)
+  clearDynamicSkills()
+  // Clear LSP diagnostic tracking state
+  resetAllLSPDiagnosticState()
+  // Clear tracked magic docs
+  clearTrackedMagicDocs()
+  // Clear session environment variables
+  clearSessionEnvVars()
+  // Clear WebFetch URL cache (up to 50MB of cached page content)
+  void import('../../tools/WebFetchTool/utils.js').then(
+    ({ clearWebFetchCache }) => clearWebFetchCache(),
+  )
+  // Clear ToolSearch description cache (full tool prompts, ~500KB for 50 MCP tools)
+  void import('../../tools/ToolSearchTool/ToolSearchTool.js').then(
+    ({ clearToolSearchDescriptionCache }) => clearToolSearchDescriptionCache(),
+  )
+  // Clear agent definitions cache (accumulates per-cwd via EnterWorktreeTool)
+  void import('../../tools/AgentTool/loadAgentsDir.js').then(
+    ({ clearAgentDefinitionsCache }) => clearAgentDefinitionsCache(),
+  )
+  // Clear SkillTool prompt cache (accumulates per project root)
+  void import('../../tools/SkillTool/prompt.js').then(({ clearPromptCache }) =>
+    clearPromptCache(),
+  )
+}

+ 7 - 0
src/commands/clear/clear.ts

@@ -0,0 +1,7 @@
+import type { LocalCommandCall } from '../../types/command.js'
+import { clearConversation } from './conversation.js'
+
+export const call: LocalCommandCall = async (_, context) => {
+  await clearConversation(context)
+  return { type: 'text', value: '' }
+}

+ 251 - 0
src/commands/clear/conversation.ts

@@ -0,0 +1,251 @@
+/**
+ * Conversation clearing utility.
+ * This module has heavier dependencies and should be lazy-loaded when possible.
+ */
+import { feature } from 'bun:bundle'
+import { randomUUID, type UUID } from 'crypto'
+import {
+  getLastMainRequestId,
+  getOriginalCwd,
+  getSessionId,
+  regenerateSessionId,
+} from '../../bootstrap/state.js'
+import {
+  type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+  logEvent,
+} from '../../services/analytics/index.js'
+import type { AppState } from '../../state/AppState.js'
+import { isInProcessTeammateTask } from '../../tasks/InProcessTeammateTask/types.js'
+import {
+  isLocalAgentTask,
+  type LocalAgentTaskState,
+} from '../../tasks/LocalAgentTask/LocalAgentTask.js'
+import { isLocalShellTask } from '../../tasks/LocalShellTask/guards.js'
+import { asAgentId } from '../../types/ids.js'
+import type { Message } from '../../types/message.js'
+import { createEmptyAttributionState } from '../../utils/commitAttribution.js'
+import type { FileStateCache } from '../../utils/fileStateCache.js'
+import {
+  executeSessionEndHooks,
+  getSessionEndHookTimeoutMs,
+} from '../../utils/hooks.js'
+import { logError } from '../../utils/log.js'
+import { clearAllPlanSlugs } from '../../utils/plans.js'
+import { setCwd } from '../../utils/Shell.js'
+import { processSessionStartHooks } from '../../utils/sessionStart.js'
+import {
+  clearSessionMetadata,
+  getAgentTranscriptPath,
+  resetSessionFilePointer,
+  saveWorktreeState,
+} from '../../utils/sessionStorage.js'
+import {
+  evictTaskOutput,
+  initTaskOutputAsSymlink,
+} from '../../utils/task/diskOutput.js'
+import { getCurrentWorktreeSession } from '../../utils/worktree.js'
+import { clearSessionCaches } from './caches.js'
+
+export async function clearConversation({
+  setMessages,
+  readFileState,
+  discoveredSkillNames,
+  loadedNestedMemoryPaths,
+  getAppState,
+  setAppState,
+  setConversationId,
+}: {
+  setMessages: (updater: (prev: Message[]) => Message[]) => void
+  readFileState: FileStateCache
+  discoveredSkillNames?: Set<string>
+  loadedNestedMemoryPaths?: Set<string>
+  getAppState?: () => AppState
+  setAppState?: (f: (prev: AppState) => AppState) => void
+  setConversationId?: (id: UUID) => void
+}): Promise<void> {
+  // Execute SessionEnd hooks before clearing (bounded by
+  // CLAUDE_CODE_SESSIONEND_HOOKS_TIMEOUT_MS, default 1.5s)
+  const sessionEndTimeoutMs = getSessionEndHookTimeoutMs()
+  await executeSessionEndHooks('clear', {
+    getAppState,
+    setAppState,
+    signal: AbortSignal.timeout(sessionEndTimeoutMs),
+    timeoutMs: sessionEndTimeoutMs,
+  })
+
+  // Signal to inference that this conversation's cache can be evicted.
+  const lastRequestId = getLastMainRequestId()
+  if (lastRequestId) {
+    logEvent('tengu_cache_eviction_hint', {
+      scope:
+        'conversation_clear' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+      last_request_id:
+        lastRequestId as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
+    })
+  }
+
+  // Compute preserved tasks up front so their per-agent state survives the
+  // cache wipe below. A task is preserved unless it explicitly has
+  // isBackgrounded === false. Main-session tasks (Ctrl+B) are preserved —
+  // they write to an isolated per-task transcript and run under an agent
+  // context, so they're safe across session ID regeneration. See
+  // LocalMainSessionTask.ts startBackgroundSession.
+  const preservedAgentIds = new Set<string>()
+  const preservedLocalAgents: LocalAgentTaskState[] = []
+  const shouldKillTask = (task: AppState['tasks'][string]): boolean =>
+    'isBackgrounded' in task && task.isBackgrounded === false
+  if (getAppState) {
+    for (const task of Object.values(getAppState().tasks)) {
+      if (shouldKillTask(task)) continue
+      if (isLocalAgentTask(task)) {
+        preservedAgentIds.add(task.agentId)
+        preservedLocalAgents.push(task)
+      } else if (isInProcessTeammateTask(task)) {
+        preservedAgentIds.add(task.identity.agentId)
+      }
+    }
+  }
+
+  setMessages(() => [])
+
+  // Clear context-blocked flag so proactive ticks resume after /clear
+  if (feature('PROACTIVE') || feature('KAIROS')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { setContextBlocked } = require('../../proactive/index.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    setContextBlocked(false)
+  }
+
+  // Force logo re-render by updating conversationId
+  if (setConversationId) {
+    setConversationId(randomUUID())
+  }
+
+  // Clear all session-related caches. Per-agent state for preserved background
+  // tasks (invoked skills, pending permission callbacks, dump state, cache-break
+  // tracking) is retained so those agents keep functioning.
+  clearSessionCaches(preservedAgentIds)
+
+  setCwd(getOriginalCwd())
+  readFileState.clear()
+  discoveredSkillNames?.clear()
+  loadedNestedMemoryPaths?.clear()
+
+  // Clean out necessary items from App State
+  if (setAppState) {
+    setAppState(prev => {
+      // Partition tasks using the same predicate computed above:
+      // kill+remove foreground tasks, preserve everything else.
+      const nextTasks: AppState['tasks'] = {}
+      for (const [taskId, task] of Object.entries(prev.tasks)) {
+        if (!shouldKillTask(task)) {
+          nextTasks[taskId] = task
+          continue
+        }
+        // Foreground task: kill it and drop from state
+        try {
+          if (task.status === 'running') {
+            if (isLocalShellTask(task)) {
+              task.shellCommand?.kill()
+              task.shellCommand?.cleanup()
+              if (task.cleanupTimeoutId) {
+                clearTimeout(task.cleanupTimeoutId)
+              }
+            }
+            if ('abortController' in task) {
+              task.abortController?.abort()
+            }
+            if ('unregisterCleanup' in task) {
+              task.unregisterCleanup?.()
+            }
+          }
+        } catch (error) {
+          logError(error)
+        }
+        void evictTaskOutput(taskId)
+      }
+
+      return {
+        ...prev,
+        tasks: nextTasks,
+        attribution: createEmptyAttributionState(),
+        // Clear standalone agent context (name/color set by /rename, /color)
+        // so the new session doesn't display the old session's identity badge
+        standaloneAgentContext: undefined,
+        fileHistory: {
+          snapshots: [],
+          trackedFiles: new Set(),
+          snapshotSequence: 0,
+        },
+        // Reset MCP state to default to trigger re-initialization.
+        // Preserve pluginReconnectKey so /clear doesn't cause a no-op
+        // (it's only bumped by /reload-plugins).
+        mcp: {
+          clients: [],
+          tools: [],
+          commands: [],
+          resources: {},
+          pluginReconnectKey: prev.mcp.pluginReconnectKey,
+        },
+      }
+    })
+  }
+
+  // Clear plan slug cache so a new plan file is used after /clear
+  clearAllPlanSlugs()
+
+  // Clear cached session metadata (title, tag, agent name/color)
+  // so the new session doesn't inherit the previous session's identity
+  clearSessionMetadata()
+
+  // Generate new session ID to provide fresh state
+  // Set the old session as parent for analytics lineage tracking
+  regenerateSessionId({ setCurrentAsParent: true })
+  // Update the environment variable so subprocesses use the new session ID
+  if (process.env.USER_TYPE === 'ant' && process.env.CLAUDE_CODE_SESSION_ID) {
+    process.env.CLAUDE_CODE_SESSION_ID = getSessionId()
+  }
+  await resetSessionFilePointer()
+
+  // Preserved local_agent tasks had their TaskOutput symlink baked against the
+  // old session ID at spawn time, but post-clear transcript writes land under
+  // the new session directory (appendEntry re-reads getSessionId()). Re-point
+  // the symlinks so TaskOutput reads the live file instead of a frozen pre-clear
+  // snapshot. Only re-point running tasks — finished tasks will never write
+  // again, so re-pointing would replace a valid symlink with a dangling one.
+  // Main-session tasks use the same per-agent path (they write via
+  // recordSidechainTranscript to getAgentTranscriptPath), so no special case.
+  for (const task of preservedLocalAgents) {
+    if (task.status !== 'running') continue
+    void initTaskOutputAsSymlink(
+      task.id,
+      getAgentTranscriptPath(asAgentId(task.agentId)),
+    )
+  }
+
+  // Re-persist mode and worktree state after the clear so future --resume
+  // knows what the new post-clear session was in. clearSessionMetadata
+  // wiped both from the cache, but the process is still in the same mode
+  // and (if applicable) the same worktree directory.
+  if (feature('COORDINATOR_MODE')) {
+    /* eslint-disable @typescript-eslint/no-require-imports */
+    const { saveMode } = require('../../utils/sessionStorage.js')
+    const {
+      isCoordinatorMode,
+    } = require('../../coordinator/coordinatorMode.js')
+    /* eslint-enable @typescript-eslint/no-require-imports */
+    saveMode(isCoordinatorMode() ? 'coordinator' : 'normal')
+  }
+  const worktreeSession = getCurrentWorktreeSession()
+  if (worktreeSession) {
+    saveWorktreeState(worktreeSession)
+  }
+
+  // Execute SessionStart hooks after clearing
+  const hookMessages = await processSessionStartHooks('clear')
+
+  // Update messages with hook results
+  if (hookMessages.length > 0) {
+    setMessages(() => hookMessages)
+  }
+}

이 변경점에서 너무 많은 파일들이 변경되어 몇몇 파일들은 표시되지 않았습니다.