envLessBridgeConfig.ts 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. import { z } from 'zod/v4'
  2. import { getFeatureValue_DEPRECATED } from '../services/analytics/growthbook.js'
  3. import { lazySchema } from '../utils/lazySchema.js'
  4. import { lt } from '../utils/semver.js'
  5. import { isEnvLessBridgeEnabled } from './bridgeEnabled.js'
  6. export type EnvLessBridgeConfig = {
  7. // withRetry — init-phase backoff (createSession, POST /bridge, recovery /bridge)
  8. init_retry_max_attempts: number
  9. init_retry_base_delay_ms: number
  10. init_retry_jitter_fraction: number
  11. init_retry_max_delay_ms: number
  12. // axios timeout for POST /sessions, POST /bridge, POST /archive
  13. http_timeout_ms: number
  14. // BoundedUUIDSet ring size (echo + re-delivery dedup)
  15. uuid_dedup_buffer_size: number
  16. // CCRClient worker heartbeat cadence. Server TTL is 60s — 20s gives 3× margin.
  17. heartbeat_interval_ms: number
  18. // ±fraction of interval — per-beat jitter to spread fleet load.
  19. heartbeat_jitter_fraction: number
  20. // Fire proactive JWT refresh this long before expires_in. Larger buffer =
  21. // more frequent refresh (refresh cadence ≈ expires_in - buffer).
  22. token_refresh_buffer_ms: number
  23. // Archive POST timeout in teardown(). Distinct from http_timeout_ms because
  24. // gracefulShutdown races runCleanupFunctions() against a 2s cap — a 10s
  25. // axios timeout on a slow/stalled archive burns the whole budget on a
  26. // request that forceExit will kill anyway.
  27. teardown_archive_timeout_ms: number
  28. // Deadline for onConnect after transport.connect(). If neither onConnect
  29. // nor onClose fires before this, emit tengu_bridge_repl_connect_timeout
  30. // — the only telemetry for the ~1% of sessions that emit `started` then
  31. // go silent (no error, no event, just nothing).
  32. connect_timeout_ms: number
  33. // Semver floor for the env-less bridge path. Separate from the v1
  34. // tengu_bridge_min_version config so a v2-specific bug can force upgrades
  35. // without blocking v1 (env-based) clients, and vice versa.
  36. min_version: string
  37. // When true, tell users their claude.ai app may be too old to see v2
  38. // sessions — lets us roll the v2 bridge before the app ships the new
  39. // session-list query.
  40. should_show_app_upgrade_message: boolean
  41. }
  42. export const DEFAULT_ENV_LESS_BRIDGE_CONFIG: EnvLessBridgeConfig = {
  43. init_retry_max_attempts: 3,
  44. init_retry_base_delay_ms: 500,
  45. init_retry_jitter_fraction: 0.25,
  46. init_retry_max_delay_ms: 4000,
  47. http_timeout_ms: 10_000,
  48. uuid_dedup_buffer_size: 2000,
  49. heartbeat_interval_ms: 20_000,
  50. heartbeat_jitter_fraction: 0.1,
  51. token_refresh_buffer_ms: 300_000,
  52. teardown_archive_timeout_ms: 1500,
  53. connect_timeout_ms: 15_000,
  54. min_version: '0.0.0',
  55. should_show_app_upgrade_message: false,
  56. }
  57. // Floors reject the whole object on violation (fall back to DEFAULT) rather
  58. // than partially trusting — same defense-in-depth as pollConfig.ts.
  59. const envLessBridgeConfigSchema = lazySchema(() =>
  60. z.object({
  61. init_retry_max_attempts: z.number().int().min(1).max(10).default(3),
  62. init_retry_base_delay_ms: z.number().int().min(100).default(500),
  63. init_retry_jitter_fraction: z.number().min(0).max(1).default(0.25),
  64. init_retry_max_delay_ms: z.number().int().min(500).default(4000),
  65. http_timeout_ms: z.number().int().min(2000).default(10_000),
  66. uuid_dedup_buffer_size: z.number().int().min(100).max(50_000).default(2000),
  67. // Server TTL is 60s. Floor 5s prevents thrash; cap 30s keeps ≥2× margin.
  68. heartbeat_interval_ms: z
  69. .number()
  70. .int()
  71. .min(5000)
  72. .max(30_000)
  73. .default(20_000),
  74. // ±fraction per beat. Cap 0.5: at max interval (30s) × 1.5 = 45s worst case,
  75. // still under the 60s TTL.
  76. heartbeat_jitter_fraction: z.number().min(0).max(0.5).default(0.1),
  77. // Floor 30s prevents tight-looping. Cap 30min rejects buffer-vs-delay
  78. // semantic inversion: ops entering expires_in-5min (the *delay until
  79. // refresh*) instead of 5min (the *buffer before expiry*) yields
  80. // delayMs = expires_in - buffer ≈ 5min instead of ≈4h. Both are positive
  81. // durations so .min() alone can't distinguish; .max() catches the
  82. // inverted value since buffer ≥ 30min is nonsensical for a multi-hour JWT.
  83. token_refresh_buffer_ms: z
  84. .number()
  85. .int()
  86. .min(30_000)
  87. .max(1_800_000)
  88. .default(300_000),
  89. // Cap 2000 keeps this under gracefulShutdown's 2s cleanup race — a higher
  90. // timeout just lies to axios since forceExit kills the socket regardless.
  91. teardown_archive_timeout_ms: z
  92. .number()
  93. .int()
  94. .min(500)
  95. .max(2000)
  96. .default(1500),
  97. // Observed p99 connect is ~2-3s; 15s is ~5× headroom. Floor 5s bounds
  98. // false-positive rate under transient slowness; cap 60s bounds how long
  99. // a truly-stalled session stays dark.
  100. connect_timeout_ms: z.number().int().min(5_000).max(60_000).default(15_000),
  101. min_version: z
  102. .string()
  103. .refine(v => {
  104. try {
  105. lt(v, '0.0.0')
  106. return true
  107. } catch {
  108. return false
  109. }
  110. })
  111. .default('0.0.0'),
  112. should_show_app_upgrade_message: z.boolean().default(false),
  113. }),
  114. )
  115. /**
  116. * Fetch the env-less bridge timing config from GrowthBook. Read once per
  117. * initEnvLessBridgeCore call — config is fixed for the lifetime of a bridge
  118. * session.
  119. *
  120. * Uses the blocking getter (not _CACHED_MAY_BE_STALE) because /remote-control
  121. * runs well after GrowthBook init — initializeGrowthBook() resolves instantly,
  122. * so there's no startup penalty, and we get the fresh in-memory remoteEval
  123. * value instead of the stale-on-first-read disk cache. The _DEPRECATED suffix
  124. * warns against startup-path usage, which this isn't.
  125. */
  126. export async function getEnvLessBridgeConfig(): Promise<EnvLessBridgeConfig> {
  127. const raw = await getFeatureValue_DEPRECATED<unknown>(
  128. 'tengu_bridge_repl_v2_config',
  129. DEFAULT_ENV_LESS_BRIDGE_CONFIG,
  130. )
  131. const parsed = envLessBridgeConfigSchema().safeParse(raw)
  132. return parsed.success ? parsed.data : DEFAULT_ENV_LESS_BRIDGE_CONFIG
  133. }
  134. /**
  135. * Returns an error message if the current CLI version is below the minimum
  136. * required for the env-less (v2) bridge path, or null if the version is fine.
  137. *
  138. * v2 analogue of checkBridgeMinVersion() — reads from tengu_bridge_repl_v2_config
  139. * instead of tengu_bridge_min_version so the two implementations can enforce
  140. * independent floors.
  141. */
  142. export async function checkEnvLessBridgeMinVersion(): Promise<string | null> {
  143. const cfg = await getEnvLessBridgeConfig()
  144. if (cfg.min_version && lt(MACRO.VERSION, cfg.min_version)) {
  145. return `Your version of Claude Code (${MACRO.VERSION}) is too old for Remote Control.\nVersion ${cfg.min_version} or higher is required. Run \`claude update\` to update.`
  146. }
  147. return null
  148. }
  149. /**
  150. * Whether to nudge users toward upgrading their claude.ai app when a
  151. * Remote Control session starts. True only when the v2 bridge is active
  152. * AND the should_show_app_upgrade_message config bit is set — lets us
  153. * roll the v2 bridge before the app ships the new session-list query.
  154. */
  155. export async function shouldShowAppUpgradeMessage(): Promise<boolean> {
  156. if (!isEnvLessBridgeEnabled()) return false
  157. const cfg = await getEnvLessBridgeConfig()
  158. return cfg.should_show_app_upgrade_message
  159. }