pollConfig.ts 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110
  1. import { z } from 'zod/v4'
  2. import { getFeatureValue_CACHED_WITH_REFRESH } from '../services/analytics/growthbook.js'
  3. import { lazySchema } from '../utils/lazySchema.js'
  4. import {
  5. DEFAULT_POLL_CONFIG,
  6. type PollIntervalConfig,
  7. } from './pollConfigDefaults.js'
  8. // .min(100) on the seek-work intervals restores the old Math.max(..., 100)
  9. // defense-in-depth floor against fat-fingered GrowthBook values. Unlike a
  10. // clamp, Zod rejects the whole object on violation — a config with one bad
  11. // field falls back to DEFAULT_POLL_CONFIG entirely rather than being
  12. // partially trusted.
  13. //
  14. // The at_capacity intervals use a 0-or-≥100 refinement: 0 means "disabled"
  15. // (heartbeat-only mode), ≥100 is the fat-finger floor. Values 1–99 are
  16. // rejected so unit confusion (ops thinks seconds, enters 10) doesn't poll
  17. // every 10ms against the VerifyEnvironmentSecretAuth DB path.
  18. //
  19. // The object-level refines require at least one at-capacity liveness
  20. // mechanism enabled: heartbeat OR the relevant poll interval. Without this,
  21. // the hb=0, atCapMs=0 drift config (ops disables heartbeat without
  22. // restoring at_capacity) falls through every throttle site with no sleep —
  23. // tight-looping /poll at HTTP-round-trip speed.
  24. const zeroOrAtLeast100 = {
  25. message: 'must be 0 (disabled) or ≥100ms',
  26. }
  27. const pollIntervalConfigSchema = lazySchema(() =>
  28. z
  29. .object({
  30. poll_interval_ms_not_at_capacity: z.number().int().min(100),
  31. // 0 = no at-capacity polling. Independent of heartbeat — both can be
  32. // enabled (heartbeat runs, periodically breaks out to poll).
  33. poll_interval_ms_at_capacity: z
  34. .number()
  35. .int()
  36. .refine(v => v === 0 || v >= 100, zeroOrAtLeast100),
  37. // 0 = disabled; positive value = heartbeat at this interval while at
  38. // capacity. Runs alongside at-capacity polling, not instead of it.
  39. // Named non_exclusive to distinguish from the old heartbeat_interval_ms
  40. // (either-or semantics in pre-#22145 clients). .default(0) so existing
  41. // GrowthBook configs without this field parse successfully.
  42. non_exclusive_heartbeat_interval_ms: z.number().int().min(0).default(0),
  43. // Multisession (bridgeMain.ts) intervals. Defaults match the
  44. // single-session values so existing configs without these fields
  45. // preserve current behavior.
  46. multisession_poll_interval_ms_not_at_capacity: z
  47. .number()
  48. .int()
  49. .min(100)
  50. .default(
  51. DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_not_at_capacity,
  52. ),
  53. multisession_poll_interval_ms_partial_capacity: z
  54. .number()
  55. .int()
  56. .min(100)
  57. .default(
  58. DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_partial_capacity,
  59. ),
  60. multisession_poll_interval_ms_at_capacity: z
  61. .number()
  62. .int()
  63. .refine(v => v === 0 || v >= 100, zeroOrAtLeast100)
  64. .default(DEFAULT_POLL_CONFIG.multisession_poll_interval_ms_at_capacity),
  65. // .min(1) matches the server's ge=1 constraint (work_v1.py:230).
  66. reclaim_older_than_ms: z.number().int().min(1).default(5000),
  67. session_keepalive_interval_v2_ms: z
  68. .number()
  69. .int()
  70. .min(0)
  71. .default(120_000),
  72. })
  73. .refine(
  74. cfg =>
  75. cfg.non_exclusive_heartbeat_interval_ms > 0 ||
  76. cfg.poll_interval_ms_at_capacity > 0,
  77. {
  78. message:
  79. 'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or poll_interval_ms_at_capacity > 0',
  80. },
  81. )
  82. .refine(
  83. cfg =>
  84. cfg.non_exclusive_heartbeat_interval_ms > 0 ||
  85. cfg.multisession_poll_interval_ms_at_capacity > 0,
  86. {
  87. message:
  88. 'at-capacity liveness requires non_exclusive_heartbeat_interval_ms > 0 or multisession_poll_interval_ms_at_capacity > 0',
  89. },
  90. ),
  91. )
  92. /**
  93. * Fetch the bridge poll interval config from GrowthBook with a 5-minute
  94. * refresh window. Validates the served JSON against the schema; falls back
  95. * to defaults if the flag is absent, malformed, or partially-specified.
  96. *
  97. * Shared by bridgeMain.ts (standalone) and replBridge.ts (REPL) so ops
  98. * can tune both poll rates fleet-wide with a single config push.
  99. */
  100. export function getPollIntervalConfig(): PollIntervalConfig {
  101. const raw = getFeatureValue_CACHED_WITH_REFRESH<unknown>(
  102. 'tengu_bridge_poll_interval_config',
  103. DEFAULT_POLL_CONFIG,
  104. 5 * 60 * 1000,
  105. )
  106. const parsed = pollIntervalConfigSchema().safeParse(raw)
  107. return parsed.success ? parsed.data : DEFAULT_POLL_CONFIG
  108. }