From 2336f86aa3568994440d331ad679ffa71375026e Mon Sep 17 00:00:00 2001 From: nate Date: Wed, 8 Apr 2026 13:28:46 +0400 Subject: [PATCH] fix: docs placement --- apps/api/src/routes/pings.ts | 8 ++++--- apps/monitor/src/main.rs | 7 ++++-- apps/monitor/src/runner.rs | 12 ++++------ apps/shared/plans.ts | 2 ++ apps/web/src/views/docs.ejs | 46 ++++++++++++++++++------------------ 5 files changed, 39 insertions(+), 36 deletions(-) diff --git a/apps/api/src/routes/pings.ts b/apps/api/src/routes/pings.ts index 81c8f58..da6fa9a 100644 --- a/apps/api/src/routes/pings.ts +++ b/apps/api/src/routes/pings.ts @@ -72,8 +72,10 @@ export const ingest = new Elysia() const scheduledAt = body.scheduled_at ? new Date(body.scheduled_at) : null; const jitterMs = body.jitter_ms ?? null; - // Per-region transition state. Empty string = unspecified/single-region. - const region = body.region ?? ''; + // Per-region transition state. Region is always populated by current runners; + // legacy null values from older pings collapse to "default" so state and + // notifications never carry an empty label. + const region = body.region && body.region.length > 0 ? body.region : 'default'; const [stateRow] = await sql` SELECT last_state, consecutive_down, cert_alert_sent FROM monitor_region_state @@ -135,7 +137,7 @@ export const ingest = new Elysia() ${important}, ${body.error ?? null}, ${Object.keys(meta).length > 0 ? sql.json(meta) : null}, - ${body.region ?? null}, + ${region}, ${body.run_id ?? null} ) RETURNING * diff --git a/apps/monitor/src/main.rs b/apps/monitor/src/main.rs index 61fced9..04a310b 100644 --- a/apps/monitor/src/main.rs +++ b/apps/monitor/src/main.rs @@ -23,9 +23,12 @@ async fn main() -> Result<()> { .unwrap_or_else(|_| "http://localhost:3000".into()); let monitor_token = env::var("MONITOR_TOKEN") .expect("MONITOR_TOKEN must be set"); - let region = env::var("REGION").unwrap_or_default(); + // Region label this runner reports on every ping. "default" means the operator + // didn't pin this runner to a named region — it's still a meaningful label so + // alerts say where they came from instead of being blank. + let region = env::var("REGION").ok().filter(|s| !s.is_empty()).unwrap_or_else(|| "default".to_string()); - info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {}", if region.is_empty() { "all" } else { ®ion }); + info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {region}"); let client = reqwest::Client::builder() .user_agent("PingQL-Monitor/0.1") diff --git a/apps/monitor/src/runner.rs b/apps/monitor/src/runner.rs index 24c9c72..1da8317 100644 --- a/apps/monitor/src/runner.rs +++ b/apps/monitor/src/runner.rs @@ -25,11 +25,7 @@ pub async fn fetch_and_run( region: &str, in_flight: &Arc>>, ) -> Result { - let url = if region.is_empty() { - format!("{coordinator_url}/internal/due?lookahead_ms=2000") - } else { - format!("{coordinator_url}/internal/due?region={}&lookahead_ms=2000", region) - }; + let url = format!("{coordinator_url}/internal/due?region={region}&lookahead_ms=2000"); let monitors: Vec = client .get(&url) .header("x-monitor-token", token) @@ -103,7 +99,7 @@ pub async fn fetch_and_run( error: Some(format!("timed out after {}ms", timeout_ms)), cert_expiry_days: None, meta: None, - region: if region_owned.is_empty() { None } else { Some(region_owned.to_string()) }, + region: Some(region_owned.to_string()), run_id: Some(run_id_owned.clone()), }, }; @@ -197,7 +193,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op error: Some(e.clone()), cert_expiry_days: None, meta: None, - region: if region.is_empty() { None } else { Some(region.to_string()) }, + region: Some(region.to_string()), run_id: Some(run_id.to_string()), } }, @@ -262,7 +258,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op error: query_error, cert_expiry_days, meta: Some(meta), - region: if region.is_empty() { None } else { Some(region.to_string()) }, + region: Some(region.to_string()), run_id: Some(run_id.to_string()), } } diff --git a/apps/shared/plans.ts b/apps/shared/plans.ts index a7466d1..9c645d5 100644 --- a/apps/shared/plans.ts +++ b/apps/shared/plans.ts @@ -52,12 +52,14 @@ export function planTier(plan: string): number { export const REGION_COLORS: Record = { "eu-central": "#3b82f6", "us-west": "#f59e0b", + "default": "#6b7280", "__none__": "#6b7280", }; export const REGION_LABELS: Record = { "eu-central": "EU Central", "us-west": "US West", + "default": "Default", }; export const REGIONS: [string, string][] = [ diff --git a/apps/web/src/views/docs.ejs b/apps/web/src/views/docs.ejs index 5f22844..a870187 100644 --- a/apps/web/src/views/docs.ejs +++ b/apps/web/src/views/docs.ejs @@ -53,11 +53,11 @@ Overview Authentication + Reliability & noise Account Monitors - Reliability Notifications Webhook payload @@ -92,6 +92,27 @@

Create an account at /dashboard or via the API. Keys are 64-character hex strings (256-bit). Shown once at registration — store them securely.

+ +
+

Reliability & alert noise

+

PingQL doesn't immediately fire on a single failed check. A few knobs let you tune how reactive vs. how stable the alerting is. These are concepts you'll see referenced throughout the API reference below.

+ +

Retries before DOWN

+

If a check fails and max_retries is greater than zero, the runner waits retry_interval_s seconds and retries up to that many times before recording a DOWN result. A successful retry posts a single UP ping with meta.retries noting how many attempts it took. This kills almost all flapping caused by transient TCP resets, brief 5xx blips, or network jitter.

+ +

Important beats & transitions

+

Every check is recorded, but the important flag on a ping is only set when the monitor's state changes (UP↔DOWN) for that region. Notifications fire on important beats only — never on every routine check. State is tracked independently per region: if us-west goes DOWN, only a subsequent us-west UP clears it. eu-central being healthy will not silence a us-west outage.

+ +

Resend interval

+

For long outages, set resend_interval to re-fire the notification every Nth consecutive DOWN beat. With resend_interval: 10, a still-broken monitor produces an extra alert every 10 down checks. 0 (the default) means: alert once on the transition, then stay quiet until recovery.

+ +

Cert expiry alerting

+

For HTTPS monitors PingQL extracts the TLS leaf certificate's days-until-expiry on every check. When that drops at or below cert_alert_days for the first time, a separate cert notification fires (one per region). The flag clears when the cert is renewed, so each renewal cycle gets exactly one alert. Set cert_alert_days: 0 to disable.

+ +

Default empty query

+

If you don't supply a query, the monitor is considered up only on a 2xx response. Redirects (3xx), client errors (4xx) and server errors (5xx) all count as DOWN. Use the QL if you want different behaviour.

+
+

Account

@@ -186,27 +207,6 @@

Returns recent ping results for a monitor. Max 1000. Each ping carries an important boolean — true on status transitions and resend ticks (the beats that triggered notifications).

- -
-

Reliability & alert noise

-

PingQL doesn't immediately fire on a single failed check. Three knobs let you tune how reactive vs. how stable the alerting is:

- -

Retries before DOWN

-

If a check fails and max_retries is greater than zero, the runner waits retry_interval_s seconds and retries up to that many times before recording a DOWN result. A successful retry posts a single UP ping with meta.retries noting how many attempts it took. This kills almost all flapping caused by transient TCP resets, brief 5xx blips, or network jitter.

- -

Important beats & transitions

-

Every check is recorded, but the important flag on a ping is only set when the monitor's state changes (UP↔DOWN) for that region. Notifications fire on important beats only — never on every routine check. State is tracked independently per region: if us-west goes DOWN, only a subsequent us-west UP clears it. eu-central being healthy will not silence a us-west outage.

- -

Resend interval

-

For long outages, set resend_interval to re-fire the notification every Nth consecutive DOWN beat. With resend_interval: 10, a still-broken monitor produces an extra alert every 10 down checks. 0 (the default) means: alert once on the transition, then stay quiet until recovery.

- -

Cert expiry alerting

-

For HTTPS monitors PingQL extracts the TLS leaf certificate's days-until-expiry on every check. When that drops at or below cert_alert_days for the first time, a separate cert notification fires (one per region). The flag clears when the cert is renewed, so each renewal cycle gets exactly one alert. Set cert_alert_days: 0 to disable.

- -

Default empty query

-

If you don't supply a query, the monitor is considered up only on a 2xx response. Redirects (3xx), client errors (4xx) and server errors (5xx) all count as DOWN. Use the QL if you want different behaviour.

-
-

Notifications

@@ -300,7 +300,7 @@ Content-Type: application/json "id": "abc123def456", "name": "My API", "url": "https://api.example.com/health", - "region": "us-west" // "" for unspecified/single-region monitors + "region": "us-west" // always present — runners default to "default" if REGION env var is unset }, "ping": { "status_code": 503,