fix: docs placement

This commit is contained in:
nate 2026-04-08 13:28:46 +04:00
parent b8f502fdb3
commit 2336f86aa3
5 changed files with 39 additions and 36 deletions

View File

@ -72,8 +72,10 @@ export const ingest = new Elysia()
const scheduledAt = body.scheduled_at ? new Date(body.scheduled_at) : null;
const jitterMs = body.jitter_ms ?? null;
// Per-region transition state. Empty string = unspecified/single-region.
const region = body.region ?? '';
// Per-region transition state. Region is always populated by current runners;
// legacy null values from older pings collapse to "default" so state and
// notifications never carry an empty label.
const region = body.region && body.region.length > 0 ? body.region : 'default';
const [stateRow] = await sql`
SELECT last_state, consecutive_down, cert_alert_sent
FROM monitor_region_state
@ -135,7 +137,7 @@ export const ingest = new Elysia()
${important},
${body.error ?? null},
${Object.keys(meta).length > 0 ? sql.json(meta) : null},
${body.region ?? null},
${region},
${body.run_id ?? null}
)
RETURNING *

View File

@ -23,9 +23,12 @@ async fn main() -> Result<()> {
.unwrap_or_else(|_| "http://localhost:3000".into());
let monitor_token = env::var("MONITOR_TOKEN")
.expect("MONITOR_TOKEN must be set");
let region = env::var("REGION").unwrap_or_default();
// Region label this runner reports on every ping. "default" means the operator
// didn't pin this runner to a named region — it's still a meaningful label so
// alerts say where they came from instead of being blank.
let region = env::var("REGION").ok().filter(|s| !s.is_empty()).unwrap_or_else(|| "default".to_string());
info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {}", if region.is_empty() { "all" } else { &region });
info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {region}");
let client = reqwest::Client::builder()
.user_agent("PingQL-Monitor/0.1")

View File

@ -25,11 +25,7 @@ pub async fn fetch_and_run(
region: &str,
in_flight: &Arc<Mutex<HashSet<String>>>,
) -> Result<usize> {
let url = if region.is_empty() {
format!("{coordinator_url}/internal/due?lookahead_ms=2000")
} else {
format!("{coordinator_url}/internal/due?region={}&lookahead_ms=2000", region)
};
let url = format!("{coordinator_url}/internal/due?region={region}&lookahead_ms=2000");
let monitors: Vec<Monitor> = client
.get(&url)
.header("x-monitor-token", token)
@ -103,7 +99,7 @@ pub async fn fetch_and_run(
error: Some(format!("timed out after {}ms", timeout_ms)),
cert_expiry_days: None,
meta: None,
region: if region_owned.is_empty() { None } else { Some(region_owned.to_string()) },
region: Some(region_owned.to_string()),
run_id: Some(run_id_owned.clone()),
},
};
@ -197,7 +193,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
error: Some(e.clone()),
cert_expiry_days: None,
meta: None,
region: if region.is_empty() { None } else { Some(region.to_string()) },
region: Some(region.to_string()),
run_id: Some(run_id.to_string()),
}
},
@ -262,7 +258,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
error: query_error,
cert_expiry_days,
meta: Some(meta),
region: if region.is_empty() { None } else { Some(region.to_string()) },
region: Some(region.to_string()),
run_id: Some(run_id.to_string()),
}
}

View File

@ -52,12 +52,14 @@ export function planTier(plan: string): number {
export const REGION_COLORS: Record<string, string> = {
"eu-central": "#3b82f6",
"us-west": "#f59e0b",
"default": "#6b7280",
"__none__": "#6b7280",
};
export const REGION_LABELS: Record<string, string> = {
"eu-central": "EU Central",
"us-west": "US West",
"default": "Default",
};
export const REGIONS: [string, string][] = [

View File

@ -53,11 +53,11 @@
<div class="nav-section">Getting Started</div>
<a href="#overview" class="nav-link">Overview</a>
<a href="#auth" class="nav-link">Authentication</a>
<a href="#reliability" class="nav-link">Reliability &amp; noise</a>
<div class="nav-section">API Reference</div>
<a href="#account" class="nav-link">Account</a>
<a href="#monitors" class="nav-link">Monitors</a>
<a href="#reliability" class="nav-link">Reliability</a>
<a href="#notifications" class="nav-link">Notifications</a>
<a href="#webhook-payload" class="nav-link">Webhook payload</a>
@ -92,6 +92,27 @@
<p>Create an account at <a href="/dashboard">/dashboard</a> or via the API. Keys are 64-character hex strings (256-bit). Shown once at registration &#8212; store them securely.</p>
</div>
<!-- Reliability -->
<div id="reliability" class="section">
<h2>Reliability &amp; alert noise</h2>
<p>PingQL doesn't immediately fire on a single failed check. A few knobs let you tune how reactive vs. how stable the alerting is. These are concepts you'll see referenced throughout the API reference below.</p>
<h3>Retries before DOWN</h3>
<p>If a check fails and <code>max_retries</code> is greater than zero, the runner waits <code>retry_interval_s</code> seconds and retries up to that many times <em>before</em> recording a DOWN result. A successful retry posts a single UP ping with <code>meta.retries</code> noting how many attempts it took. This kills almost all flapping caused by transient TCP resets, brief 5xx blips, or network jitter.</p>
<h3>Important beats &amp; transitions</h3>
<p>Every check is recorded, but the <code>important</code> flag on a ping is only set when the monitor's state changes (UP↔DOWN) <em>for that region</em>. Notifications fire on important beats only — never on every routine check. State is tracked independently per region: if <code>us-west</code> goes DOWN, only a subsequent <code>us-west</code> UP clears it. <code>eu-central</code> being healthy will not silence a <code>us-west</code> outage.</p>
<h3>Resend interval</h3>
<p>For long outages, set <code>resend_interval</code> to re-fire the notification every Nth consecutive DOWN beat. With <code>resend_interval: 10</code>, a still-broken monitor produces an extra alert every 10 down checks. <code>0</code> (the default) means: alert once on the transition, then stay quiet until recovery.</p>
<h3>Cert expiry alerting</h3>
<p>For HTTPS monitors PingQL extracts the TLS leaf certificate's days-until-expiry on every check. When that drops at or below <code>cert_alert_days</code> for the first time, a separate <code>cert</code> notification fires (one per region). The flag clears when the cert is renewed, so each renewal cycle gets exactly one alert. Set <code>cert_alert_days: 0</code> to disable.</p>
<h3>Default empty query</h3>
<p>If you don't supply a <code>query</code>, the monitor is considered up only on a <strong style="color:#4ade80">2xx</strong> response. Redirects (3xx), client errors (4xx) and server errors (5xx) all count as DOWN. Use the QL if you want different behaviour.</p>
</div>
<!-- Account -->
<div id="account" class="section">
<h2>Account</h2>
@ -186,27 +207,6 @@
<p class="endpoint-desc">Returns recent ping results for a monitor. Max 1000. Each ping carries an <code>important</code> boolean — true on status transitions and resend ticks (the beats that triggered notifications).</p>
</div>
<!-- Reliability -->
<div id="reliability" class="section">
<h2>Reliability &amp; alert noise</h2>
<p>PingQL doesn't immediately fire on a single failed check. Three knobs let you tune how reactive vs. how stable the alerting is:</p>
<h3>Retries before DOWN</h3>
<p>If a check fails and <code>max_retries</code> is greater than zero, the runner waits <code>retry_interval_s</code> seconds and retries up to that many times <em>before</em> recording a DOWN result. A successful retry posts a single UP ping with <code>meta.retries</code> noting how many attempts it took. This kills almost all flapping caused by transient TCP resets, brief 5xx blips, or network jitter.</p>
<h3>Important beats &amp; transitions</h3>
<p>Every check is recorded, but the <code>important</code> flag on a ping is only set when the monitor's state changes (UP↔DOWN) <em>for that region</em>. Notifications fire on important beats only — never on every routine check. State is tracked independently per region: if <code>us-west</code> goes DOWN, only a subsequent <code>us-west</code> UP clears it. <code>eu-central</code> being healthy will not silence a <code>us-west</code> outage.</p>
<h3>Resend interval</h3>
<p>For long outages, set <code>resend_interval</code> to re-fire the notification every Nth consecutive DOWN beat. With <code>resend_interval: 10</code>, a still-broken monitor produces an extra alert every 10 down checks. <code>0</code> (the default) means: alert once on the transition, then stay quiet until recovery.</p>
<h3>Cert expiry alerting</h3>
<p>For HTTPS monitors PingQL extracts the TLS leaf certificate's days-until-expiry on every check. When that drops at or below <code>cert_alert_days</code> for the first time, a separate <code>cert</code> notification fires (one per region). The flag clears when the cert is renewed, so each renewal cycle gets exactly one alert. Set <code>cert_alert_days: 0</code> to disable.</p>
<h3>Default empty query</h3>
<p>If you don't supply a <code>query</code>, the monitor is considered up only on a <strong style="color:#4ade80">2xx</strong> response. Redirects (3xx), client errors (4xx) and server errors (5xx) all count as DOWN. Use the QL if you want different behaviour.</p>
</div>
<!-- Notifications -->
<div id="notifications" class="section">
<h2>Notifications</h2>
@ -300,7 +300,7 @@ Content-Type: application/json
<span class="k">"id"</span>: <span class="s">"abc123def456"</span>,
<span class="k">"name"</span>: <span class="s">"My API"</span>,
<span class="k">"url"</span>: <span class="s">"https://api.example.com/health"</span>,
<span class="k">"region"</span>: <span class="s">"us-west"</span> <span class="c">// "" for unspecified/single-region monitors</span>
<span class="k">"region"</span>: <span class="s">"us-west"</span> <span class="c">// always present — runners default to "default" if REGION env var is unset</span>
},
<span class="k">"ping"</span>: {
<span class="k">"status_code"</span>: <span class="n">503</span>,