fix: docs placement
This commit is contained in:
parent
b8f502fdb3
commit
2336f86aa3
|
|
@ -72,8 +72,10 @@ export const ingest = new Elysia()
|
|||
const scheduledAt = body.scheduled_at ? new Date(body.scheduled_at) : null;
|
||||
const jitterMs = body.jitter_ms ?? null;
|
||||
|
||||
// Per-region transition state. Empty string = unspecified/single-region.
|
||||
const region = body.region ?? '';
|
||||
// Per-region transition state. Region is always populated by current runners;
|
||||
// legacy null values from older pings collapse to "default" so state and
|
||||
// notifications never carry an empty label.
|
||||
const region = body.region && body.region.length > 0 ? body.region : 'default';
|
||||
const [stateRow] = await sql`
|
||||
SELECT last_state, consecutive_down, cert_alert_sent
|
||||
FROM monitor_region_state
|
||||
|
|
@ -135,7 +137,7 @@ export const ingest = new Elysia()
|
|||
${important},
|
||||
${body.error ?? null},
|
||||
${Object.keys(meta).length > 0 ? sql.json(meta) : null},
|
||||
${body.region ?? null},
|
||||
${region},
|
||||
${body.run_id ?? null}
|
||||
)
|
||||
RETURNING *
|
||||
|
|
|
|||
|
|
@ -23,9 +23,12 @@ async fn main() -> Result<()> {
|
|||
.unwrap_or_else(|_| "http://localhost:3000".into());
|
||||
let monitor_token = env::var("MONITOR_TOKEN")
|
||||
.expect("MONITOR_TOKEN must be set");
|
||||
let region = env::var("REGION").unwrap_or_default();
|
||||
// Region label this runner reports on every ping. "default" means the operator
|
||||
// didn't pin this runner to a named region — it's still a meaningful label so
|
||||
// alerts say where they came from instead of being blank.
|
||||
let region = env::var("REGION").ok().filter(|s| !s.is_empty()).unwrap_or_else(|| "default".to_string());
|
||||
|
||||
info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {}", if region.is_empty() { "all" } else { ®ion });
|
||||
info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {region}");
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent("PingQL-Monitor/0.1")
|
||||
|
|
|
|||
|
|
@ -25,11 +25,7 @@ pub async fn fetch_and_run(
|
|||
region: &str,
|
||||
in_flight: &Arc<Mutex<HashSet<String>>>,
|
||||
) -> Result<usize> {
|
||||
let url = if region.is_empty() {
|
||||
format!("{coordinator_url}/internal/due?lookahead_ms=2000")
|
||||
} else {
|
||||
format!("{coordinator_url}/internal/due?region={}&lookahead_ms=2000", region)
|
||||
};
|
||||
let url = format!("{coordinator_url}/internal/due?region={region}&lookahead_ms=2000");
|
||||
let monitors: Vec<Monitor> = client
|
||||
.get(&url)
|
||||
.header("x-monitor-token", token)
|
||||
|
|
@ -103,7 +99,7 @@ pub async fn fetch_and_run(
|
|||
error: Some(format!("timed out after {}ms", timeout_ms)),
|
||||
cert_expiry_days: None,
|
||||
meta: None,
|
||||
region: if region_owned.is_empty() { None } else { Some(region_owned.to_string()) },
|
||||
region: Some(region_owned.to_string()),
|
||||
run_id: Some(run_id_owned.clone()),
|
||||
},
|
||||
};
|
||||
|
|
@ -197,7 +193,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
|
|||
error: Some(e.clone()),
|
||||
cert_expiry_days: None,
|
||||
meta: None,
|
||||
region: if region.is_empty() { None } else { Some(region.to_string()) },
|
||||
region: Some(region.to_string()),
|
||||
run_id: Some(run_id.to_string()),
|
||||
}
|
||||
},
|
||||
|
|
@ -262,7 +258,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
|
|||
error: query_error,
|
||||
cert_expiry_days,
|
||||
meta: Some(meta),
|
||||
region: if region.is_empty() { None } else { Some(region.to_string()) },
|
||||
region: Some(region.to_string()),
|
||||
run_id: Some(run_id.to_string()),
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,12 +52,14 @@ export function planTier(plan: string): number {
|
|||
export const REGION_COLORS: Record<string, string> = {
|
||||
"eu-central": "#3b82f6",
|
||||
"us-west": "#f59e0b",
|
||||
"default": "#6b7280",
|
||||
"__none__": "#6b7280",
|
||||
};
|
||||
|
||||
export const REGION_LABELS: Record<string, string> = {
|
||||
"eu-central": "EU Central",
|
||||
"us-west": "US West",
|
||||
"default": "Default",
|
||||
};
|
||||
|
||||
export const REGIONS: [string, string][] = [
|
||||
|
|
|
|||
|
|
@ -53,11 +53,11 @@
|
|||
<div class="nav-section">Getting Started</div>
|
||||
<a href="#overview" class="nav-link">Overview</a>
|
||||
<a href="#auth" class="nav-link">Authentication</a>
|
||||
<a href="#reliability" class="nav-link">Reliability & noise</a>
|
||||
|
||||
<div class="nav-section">API Reference</div>
|
||||
<a href="#account" class="nav-link">Account</a>
|
||||
<a href="#monitors" class="nav-link">Monitors</a>
|
||||
<a href="#reliability" class="nav-link">Reliability</a>
|
||||
<a href="#notifications" class="nav-link">Notifications</a>
|
||||
<a href="#webhook-payload" class="nav-link">Webhook payload</a>
|
||||
|
||||
|
|
@ -92,6 +92,27 @@
|
|||
<p>Create an account at <a href="/dashboard">/dashboard</a> or via the API. Keys are 64-character hex strings (256-bit). Shown once at registration — store them securely.</p>
|
||||
</div>
|
||||
|
||||
<!-- Reliability -->
|
||||
<div id="reliability" class="section">
|
||||
<h2>Reliability & alert noise</h2>
|
||||
<p>PingQL doesn't immediately fire on a single failed check. A few knobs let you tune how reactive vs. how stable the alerting is. These are concepts you'll see referenced throughout the API reference below.</p>
|
||||
|
||||
<h3>Retries before DOWN</h3>
|
||||
<p>If a check fails and <code>max_retries</code> is greater than zero, the runner waits <code>retry_interval_s</code> seconds and retries up to that many times <em>before</em> recording a DOWN result. A successful retry posts a single UP ping with <code>meta.retries</code> noting how many attempts it took. This kills almost all flapping caused by transient TCP resets, brief 5xx blips, or network jitter.</p>
|
||||
|
||||
<h3>Important beats & transitions</h3>
|
||||
<p>Every check is recorded, but the <code>important</code> flag on a ping is only set when the monitor's state changes (UP↔DOWN) <em>for that region</em>. Notifications fire on important beats only — never on every routine check. State is tracked independently per region: if <code>us-west</code> goes DOWN, only a subsequent <code>us-west</code> UP clears it. <code>eu-central</code> being healthy will not silence a <code>us-west</code> outage.</p>
|
||||
|
||||
<h3>Resend interval</h3>
|
||||
<p>For long outages, set <code>resend_interval</code> to re-fire the notification every Nth consecutive DOWN beat. With <code>resend_interval: 10</code>, a still-broken monitor produces an extra alert every 10 down checks. <code>0</code> (the default) means: alert once on the transition, then stay quiet until recovery.</p>
|
||||
|
||||
<h3>Cert expiry alerting</h3>
|
||||
<p>For HTTPS monitors PingQL extracts the TLS leaf certificate's days-until-expiry on every check. When that drops at or below <code>cert_alert_days</code> for the first time, a separate <code>cert</code> notification fires (one per region). The flag clears when the cert is renewed, so each renewal cycle gets exactly one alert. Set <code>cert_alert_days: 0</code> to disable.</p>
|
||||
|
||||
<h3>Default empty query</h3>
|
||||
<p>If you don't supply a <code>query</code>, the monitor is considered up only on a <strong style="color:#4ade80">2xx</strong> response. Redirects (3xx), client errors (4xx) and server errors (5xx) all count as DOWN. Use the QL if you want different behaviour.</p>
|
||||
</div>
|
||||
|
||||
<!-- Account -->
|
||||
<div id="account" class="section">
|
||||
<h2>Account</h2>
|
||||
|
|
@ -186,27 +207,6 @@
|
|||
<p class="endpoint-desc">Returns recent ping results for a monitor. Max 1000. Each ping carries an <code>important</code> boolean — true on status transitions and resend ticks (the beats that triggered notifications).</p>
|
||||
</div>
|
||||
|
||||
<!-- Reliability -->
|
||||
<div id="reliability" class="section">
|
||||
<h2>Reliability & alert noise</h2>
|
||||
<p>PingQL doesn't immediately fire on a single failed check. Three knobs let you tune how reactive vs. how stable the alerting is:</p>
|
||||
|
||||
<h3>Retries before DOWN</h3>
|
||||
<p>If a check fails and <code>max_retries</code> is greater than zero, the runner waits <code>retry_interval_s</code> seconds and retries up to that many times <em>before</em> recording a DOWN result. A successful retry posts a single UP ping with <code>meta.retries</code> noting how many attempts it took. This kills almost all flapping caused by transient TCP resets, brief 5xx blips, or network jitter.</p>
|
||||
|
||||
<h3>Important beats & transitions</h3>
|
||||
<p>Every check is recorded, but the <code>important</code> flag on a ping is only set when the monitor's state changes (UP↔DOWN) <em>for that region</em>. Notifications fire on important beats only — never on every routine check. State is tracked independently per region: if <code>us-west</code> goes DOWN, only a subsequent <code>us-west</code> UP clears it. <code>eu-central</code> being healthy will not silence a <code>us-west</code> outage.</p>
|
||||
|
||||
<h3>Resend interval</h3>
|
||||
<p>For long outages, set <code>resend_interval</code> to re-fire the notification every Nth consecutive DOWN beat. With <code>resend_interval: 10</code>, a still-broken monitor produces an extra alert every 10 down checks. <code>0</code> (the default) means: alert once on the transition, then stay quiet until recovery.</p>
|
||||
|
||||
<h3>Cert expiry alerting</h3>
|
||||
<p>For HTTPS monitors PingQL extracts the TLS leaf certificate's days-until-expiry on every check. When that drops at or below <code>cert_alert_days</code> for the first time, a separate <code>cert</code> notification fires (one per region). The flag clears when the cert is renewed, so each renewal cycle gets exactly one alert. Set <code>cert_alert_days: 0</code> to disable.</p>
|
||||
|
||||
<h3>Default empty query</h3>
|
||||
<p>If you don't supply a <code>query</code>, the monitor is considered up only on a <strong style="color:#4ade80">2xx</strong> response. Redirects (3xx), client errors (4xx) and server errors (5xx) all count as DOWN. Use the QL if you want different behaviour.</p>
|
||||
</div>
|
||||
|
||||
<!-- Notifications -->
|
||||
<div id="notifications" class="section">
|
||||
<h2>Notifications</h2>
|
||||
|
|
@ -300,7 +300,7 @@ Content-Type: application/json
|
|||
<span class="k">"id"</span>: <span class="s">"abc123def456"</span>,
|
||||
<span class="k">"name"</span>: <span class="s">"My API"</span>,
|
||||
<span class="k">"url"</span>: <span class="s">"https://api.example.com/health"</span>,
|
||||
<span class="k">"region"</span>: <span class="s">"us-west"</span> <span class="c">// "" for unspecified/single-region monitors</span>
|
||||
<span class="k">"region"</span>: <span class="s">"us-west"</span> <span class="c">// always present — runners default to "default" if REGION env var is unset</span>
|
||||
},
|
||||
<span class="k">"ping"</span>: {
|
||||
<span class="k">"status_code"</span>: <span class="n">503</span>,
|
||||
|
|
|
|||
Loading…
Reference in New Issue