pingql/apps/status/src/data.ts

519 lines
20 KiB
TypeScript

// Loads the read-only data needed to render a public status page. NEVER reads
// the raw `pings` table — uses `monitor_region_state` for current state and
// `monitor_uptime_rollup` for historical uptime windows.
import sql from "./db";
export type Window = "24h" | "7d" | "30d" | "90d";
export type BucketType = "hourly" | "daily" | "weekly";
const WINDOW_TO_BUCKET: Record<Window, { bucket: BucketType; count: number }> = {
"24h": { bucket: "hourly", count: 24 },
"7d": { bucket: "daily", count: 7 },
"30d": { bucket: "daily", count: 30 },
"90d": { bucket: "weekly", count: 13 },
};
export interface StatusPageRow {
id: string;
account_id: string;
slug: string;
title: string;
description: string | null;
theme: "auto" | "light" | "dark";
password_hash: string | null;
index_search: boolean;
show_powered_by: boolean;
show_response_time:boolean;
show_cert_expiry: boolean;
default_window: Window;
display_mode: "compact" | "expanded";
custom_css: string | null;
footer_text: string | null;
og_image_url: string | null;
analytics_html: string | null;
auto_refresh_s: number;
}
export interface MultiWindowUptime {
d24: number | null;
d7: number | null;
d30: number | null;
d90: number | null;
}
export interface MonitorRow {
id: string;
display_name: string;
url: string;
group_id: string | null;
position: number;
display_mode: "compact" | "expanded"; // resolved (per-monitor override → page default → 'expanded')
current_state: "up" | "down" | "unknown";
region_states: Array<{ region: string; state: "up" | "down" | "unknown"; updated_at: string | null }>;
uptime_pct: number | null; // for the page's default_window
uptime: MultiWindowUptime; // 24h / 7d / 30d / 90d row
buckets: Array<{ start: string; total: number; up: number }>; // bar chart input
avg_latency: number | null;
latency_history: Array<{ region: string; latency_ms: number | null; ts: string }>;
}
// Average latency of the *fastest* region per monitor over a given window.
// Status pages are customer-facing — we want to show our best foot forward,
// not a noisy average that gets dragged down by a single distant region.
export async function loadFastestRegionLatency(
monitorIds: string[],
bucket: BucketType,
intervalLiteral: string,
): Promise<Record<string, number | null>> {
const out: Record<string, number | null> = {};
if (monitorIds.length === 0) return out;
for (const id of monitorIds) out[id] = null;
const ids = sql.array(monitorIds);
let rows = await sql<any[]>`
SELECT monitor_id, region,
(sum(avg_latency * total) / NULLIF(sum(total), 0))::float AS avg_lat
FROM monitor_uptime_rollup
WHERE monitor_id = ANY(${ids}::text[])
AND bucket_type = ${bucket}
AND bucket_start > now() - ${intervalLiteral}::interval
AND avg_latency IS NOT NULL
GROUP BY 1, 2
`;
if (rows.length === 0) {
// Fallback while rollup is unpopulated. Bounded by the same window so cheap.
rows = await sql<any[]>`
SELECT monitor_id, COALESCE(region, 'default') AS region,
avg(latency_ms)::float AS avg_lat
FROM pings
WHERE monitor_id = ANY(${ids}::text[])
AND checked_at > now() - ${intervalLiteral}::interval
AND latency_ms IS NOT NULL
GROUP BY 1, 2
`;
}
// For each monitor, keep the region with the lowest average latency.
for (const r of rows) {
if (r.avg_lat == null) continue;
const cur = out[r.monitor_id];
if (cur == null || r.avg_lat < cur) out[r.monitor_id] = r.avg_lat;
}
// Round to integer ms.
for (const id of Object.keys(out)) {
if (out[id] != null) out[id] = Math.round(out[id] as number);
}
return out;
}
// Single SQL pass that produces all four uptime windows for a set of monitors.
// Reads only the rollup table; falls back to a pings aggregate when the rollup
// has nothing for these monitors yet (same pattern as loadMonitors).
export async function loadMultiWindowUptime(monitorIds: string[]): Promise<Record<string, MultiWindowUptime>> {
const empty: Record<string, MultiWindowUptime> = {};
if (monitorIds.length === 0) return empty;
for (const id of monitorIds) empty[id] = { d24: null, d7: null, d30: null, d90: null };
const ids = sql.array(monitorIds);
let rows = await sql<any[]>`
SELECT monitor_id,
(sum(up_count) FILTER (WHERE bucket_type='hourly' AND bucket_start > now() - interval '24 hours'))::float
/ NULLIF(sum(total) FILTER (WHERE bucket_type='hourly' AND bucket_start > now() - interval '24 hours'), 0) AS pct_24h,
(sum(up_count) FILTER (WHERE bucket_type='daily' AND bucket_start > now() - interval '7 days'))::float
/ NULLIF(sum(total) FILTER (WHERE bucket_type='daily' AND bucket_start > now() - interval '7 days'), 0) AS pct_7d,
(sum(up_count) FILTER (WHERE bucket_type='daily' AND bucket_start > now() - interval '30 days'))::float
/ NULLIF(sum(total) FILTER (WHERE bucket_type='daily' AND bucket_start > now() - interval '30 days'), 0) AS pct_30d,
(sum(up_count) FILTER (WHERE bucket_type='weekly' AND bucket_start > now() - interval '90 days'))::float
/ NULLIF(sum(total) FILTER (WHERE bucket_type='weekly' AND bucket_start > now() - interval '90 days'), 0) AS pct_90d
FROM monitor_uptime_rollup
WHERE monitor_id = ANY(${ids}::text[])
GROUP BY 1
`;
// Fallback when the rollup is empty: aggregate directly from pings. Bounded
// by the 90d window so it's still cheap.
if (rows.length === 0) {
rows = await sql<any[]>`
SELECT monitor_id,
(count(*) FILTER (WHERE up AND checked_at > now() - interval '24 hours'))::float
/ NULLIF(count(*) FILTER (WHERE checked_at > now() - interval '24 hours'), 0) AS pct_24h,
(count(*) FILTER (WHERE up AND checked_at > now() - interval '7 days'))::float
/ NULLIF(count(*) FILTER (WHERE checked_at > now() - interval '7 days'), 0) AS pct_7d,
(count(*) FILTER (WHERE up AND checked_at > now() - interval '30 days'))::float
/ NULLIF(count(*) FILTER (WHERE checked_at > now() - interval '30 days'), 0) AS pct_30d,
(count(*) FILTER (WHERE up AND checked_at > now() - interval '90 days'))::float
/ NULLIF(count(*) FILTER (WHERE checked_at > now() - interval '90 days'), 0) AS pct_90d
FROM pings
WHERE monitor_id = ANY(${ids}::text[])
AND checked_at > now() - interval '90 days'
GROUP BY 1
`;
}
const out = empty;
const toPct = (v: any): number | null => v == null ? null : +(Number(v) * 100).toFixed(2);
for (const r of rows) {
out[r.monitor_id] = {
d24: toPct(r.pct_24h),
d7: toPct(r.pct_7d),
d30: toPct(r.pct_30d),
d90: toPct(r.pct_90d),
};
}
return out;
}
export interface GroupRow {
id: string;
name: string;
position: number;
}
export interface IncidentUpdateRow {
id: string;
status: string;
body_html: string;
created_at: string;
}
export interface IncidentSummary {
id: string;
title: string;
status: string;
severity: string;
pinned: boolean;
started_at: string;
resolved_at: string | null;
updates: IncidentUpdateRow[]; // full timeline, newest first
}
export async function loadStatusPage(slug: string): Promise<StatusPageRow | null> {
const [row] = await sql<StatusPageRow[]>`SELECT * FROM status_pages WHERE slug = ${slug}`;
return row ?? null;
}
export async function loadGroups(pageId: string): Promise<GroupRow[]> {
return sql<GroupRow[]>`
SELECT id, name, position FROM status_page_groups
WHERE status_page_id = ${pageId}
ORDER BY position ASC, name ASC
`;
}
export async function loadMonitors(pageId: string, window: Window, pageDisplayMode: "compact" | "expanded" = "expanded"): Promise<MonitorRow[]> {
// Step 1: page → monitors with display overrides + group + position.
const monitorRows = await sql<any[]>`
SELECT
spm.monitor_id AS id,
COALESCE(spm.display_name, m.name) AS display_name,
m.url,
spm.group_id,
spm.position,
spm.display_mode AS spm_display_mode
FROM status_page_monitors spm
JOIN monitors m ON m.id = spm.monitor_id
WHERE spm.status_page_id = ${pageId}
ORDER BY spm.position ASC, m.name ASC
`;
if (monitorRows.length === 0) return [];
const ids = monitorRows.map((r) => r.id);
// Step 2: per-region current state for these monitors.
const stateRows = await sql<{ monitor_id: string; region: string; last_state: string | null; updated_at: string }[]>`
SELECT monitor_id, region, last_state, updated_at
FROM monitor_region_state
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
`;
const stateByMonitor: Record<string, MonitorRow["region_states"]> = {};
for (const s of stateRows) {
if (!stateByMonitor[s.monitor_id]) stateByMonitor[s.monitor_id] = [];
stateByMonitor[s.monitor_id]!.push({
region: s.region,
state: (s.last_state as any) ?? "unknown",
updated_at: s.updated_at,
});
}
// Step 3: uptime rollup buckets covering the requested window.
const { bucket, count } = WINDOW_TO_BUCKET[window];
const truncUnit = bucket === "hourly" ? "hour" : bucket === "daily" ? "day" : "week";
const intervalLiteral = `${count} ${truncUnit}s`;
let rollupRows = await sql<any[]>`
SELECT monitor_id, bucket_start, sum(total)::int AS total, sum(up_count)::int AS up_count, avg(avg_latency)::real AS avg_latency
FROM monitor_uptime_rollup
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
AND bucket_type = ${bucket}
AND bucket_start > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
GROUP BY monitor_id, bucket_start
ORDER BY monitor_id, bucket_start ASC
`;
// Fallback: if the rollup table has nothing for any of these monitors in
// this window (e.g. the api hasn't backfilled yet, or the rollup job is
// silently broken), aggregate directly from pings. Bounded by the window so
// it stays cheap. Once the rollup catches up this branch never fires.
if (rollupRows.length === 0) {
// Group/order by ordinals — Postgres won't dedupe a $-parameterised
// date_trunc() between SELECT and GROUP BY otherwise.
rollupRows = await sql<any[]>`
SELECT
monitor_id,
date_trunc(${truncUnit}, checked_at) AS bucket_start,
count(*)::int AS total,
count(*) FILTER (WHERE up)::int AS up_count,
avg(latency_ms)::real AS avg_latency
FROM pings
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
AND checked_at > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
GROUP BY 1, 2
ORDER BY 1, 2 ASC
`;
}
// Index actual rollup data by (monitor_id, isoBucketStart) so we can fill in
// the missing slots below.
const indexed: Record<string, Record<string, { total: number; up: number; avg_latency: number | null }>> = {};
for (const r of rollupRows) {
const startIso = r.bucket_start instanceof Date ? r.bucket_start.toISOString() : String(r.bucket_start);
if (!indexed[r.monitor_id]) indexed[r.monitor_id] = {};
indexed[r.monitor_id]![startIso] = { total: r.total, up: r.up_count, avg_latency: r.avg_latency ?? null };
}
// Customer-facing latency = average of the fastest region for the page's
// window. Computed via a separate query that retains per-region info.
const fastestLatency = await loadFastestRegionLatency(ids, bucket, intervalLiteral);
// Generate the full sequence of expected bucket timestamps so empty bars
// render as "no data" instead of disappearing entirely. Truncate `now()` to
// the unit so the slot boundaries line up with what the rollup writes.
const bucketMs = bucket === "hourly" ? 3600_000 : bucket === "daily" ? 86_400_000 : 604_800_000;
const truncate = (d: Date): Date => {
const t = new Date(d);
if (bucket === "hourly") { t.setUTCMinutes(0, 0, 0); }
else { t.setUTCHours(0, 0, 0, 0); }
if (bucket === "weekly") {
// ISO week starts Monday.
const day = (t.getUTCDay() + 6) % 7;
t.setUTCDate(t.getUTCDate() - day);
}
return t;
};
const nowTrunc = truncate(new Date()).getTime();
const slotIsos: string[] = [];
for (let i = count - 1; i >= 0; i--) {
slotIsos.push(new Date(nowTrunc - i * bucketMs).toISOString());
}
const bucketsByMonitor: Record<string, MonitorRow["buckets"]> = {};
for (const id of ids) {
const slotMap = indexed[id] ?? {};
bucketsByMonitor[id] = slotIsos.map((iso) => {
const hit = slotMap[iso];
return hit ? { start: iso, total: hit.total, up: hit.up } : { start: iso, total: 0, up: 0 };
});
}
// Step 4: multi-window uptime row (24h / 7d / 30d / 90d) per monitor.
const multiWindow = await loadMultiWindowUptime(ids);
// Step 5: tiny recent latency history for the sparkline (last 30 hourly buckets).
const latRows = await sql<any[]>`
SELECT monitor_id, region, bucket_start, avg_latency
FROM monitor_uptime_rollup
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
AND bucket_type = 'hourly'
AND bucket_start > now() - interval '30 hours'
ORDER BY monitor_id, bucket_start ASC
`;
const latencyByMonitorList: Record<string, MonitorRow["latency_history"]> = {};
for (const r of latRows) {
if (!latencyByMonitorList[r.monitor_id]) latencyByMonitorList[r.monitor_id] = [];
latencyByMonitorList[r.monitor_id]!.push({
region: r.region,
latency_ms: r.avg_latency != null ? Math.round(r.avg_latency) : null,
ts: r.bucket_start instanceof Date ? r.bucket_start.toISOString() : String(r.bucket_start),
});
}
return monitorRows.map((m) => {
const region_states = stateByMonitor[m.id] ?? [];
let current_state: MonitorRow["current_state"] = "unknown";
if (region_states.length > 0) {
const anyDown = region_states.some((s) => s.state === "down");
const anyUp = region_states.some((s) => s.state === "up");
current_state = anyDown ? "down" : anyUp ? "up" : "unknown";
}
const buckets = bucketsByMonitor[m.id] ?? [];
let uptime_pct: number | null = null;
if (buckets.length > 0) {
const tot = buckets.reduce((a, b) => a + b.total, 0);
const upT = buckets.reduce((a, b) => a + b.up, 0);
uptime_pct = tot > 0 ? +(100 * upT / tot).toFixed(2) : null;
}
const avg_latency = fastestLatency[m.id] ?? null;
// Per-monitor display mode override → page default → 'expanded'.
const display_mode = (m.spm_display_mode === 'compact' || m.spm_display_mode === 'expanded')
? m.spm_display_mode
: pageDisplayMode;
return {
id: m.id,
display_name: m.display_name,
url: m.url,
group_id: m.group_id,
position: m.position,
display_mode,
current_state,
region_states,
uptime_pct,
uptime: multiWindow[m.id] ?? { d24: null, d7: null, d30: null, d90: null },
buckets,
avg_latency,
latency_history: latencyByMonitorList[m.id] ?? [],
} as MonitorRow;
});
}
export async function loadIncidents(pageId: string): Promise<{ active: IncidentSummary[]; recent: IncidentSummary[] }> {
const incidents = await sql<any[]>`
SELECT i.*
FROM incidents i
JOIN incident_status_pages isp ON isp.incident_id = i.id
WHERE isp.status_page_id = ${pageId}
ORDER BY i.started_at DESC
LIMIT 50
`;
if (incidents.length === 0) return { active: [], recent: [] };
const ids = incidents.map((i) => i.id);
// Full timeline per incident (newest first), so the public page can show the
// entire course of events on both active and resolved incidents.
const allUpdates = await sql<any[]>`
SELECT id, incident_id, status, body_html, created_at
FROM incident_updates
WHERE incident_id = ANY(${sql.array(ids)}::uuid[])
ORDER BY created_at DESC
`;
const updatesByIncident: Record<string, IncidentUpdateRow[]> = {};
for (const u of allUpdates) {
if (!updatesByIncident[u.incident_id]) updatesByIncident[u.incident_id] = [];
updatesByIncident[u.incident_id]!.push({
id: u.id,
status: u.status,
body_html: u.body_html,
created_at: u.created_at instanceof Date ? u.created_at.toISOString() : String(u.created_at),
});
}
const enriched: IncidentSummary[] = incidents.map((i) => ({
id: i.id,
title: i.title,
status: i.status,
severity: i.severity,
pinned: i.pinned,
started_at: i.started_at instanceof Date ? i.started_at.toISOString() : String(i.started_at),
resolved_at: i.resolved_at ? (i.resolved_at instanceof Date ? i.resolved_at.toISOString() : String(i.resolved_at)) : null,
updates: updatesByIncident[i.id] ?? [],
}));
const active = enriched.filter((i) => i.pinned && !i.resolved_at);
const recent = enriched.filter((i) => !active.includes(i));
return { active, recent };
}
export interface MonitorDetailPayload {
monitor: MonitorRow;
incidents: IncidentSummary[]; // recent incidents that touch this monitor
generated_at: string;
}
export async function loadMonitorDetail(slug: string, monitorId: string, window?: Window): Promise<MonitorDetailPayload | null> {
const page = await loadStatusPage(slug);
if (!page) return null;
// Confirm the monitor is actually attached to this page (and load any
// page-specific overrides at the same time).
const [link] = await sql<any[]>`
SELECT spm.monitor_id, COALESCE(spm.display_name, m.name) AS display_name, m.url, spm.group_id, spm.position
FROM status_page_monitors spm
JOIN monitors m ON m.id = spm.monitor_id
WHERE spm.status_page_id = ${page.id} AND spm.monitor_id = ${monitorId}
`;
if (!link) return null;
const win = (window ?? page.default_window) as Window;
// Reuse the bulk loader with a single-monitor list — keeps the bucket/state
// logic in one place. Cheap because we're querying for one ID.
const monitors = await loadMonitors(page.id, win, page.display_mode);
const m = monitors.find((x) => x.id === monitorId);
if (!m) return null;
// Incidents touching this monitor (any status), most recent 20, full timeline.
const incidentRows = await sql<any[]>`
SELECT i.*
FROM incidents i
JOIN incident_monitors im ON im.incident_id = i.id
WHERE im.monitor_id = ${monitorId} AND i.account_id = ${page.account_id}
ORDER BY i.started_at DESC
LIMIT 20
`;
let incidents: IncidentSummary[] = [];
if (incidentRows.length > 0) {
const ids = incidentRows.map((i) => i.id);
const allUpdates = await sql<any[]>`
SELECT id, incident_id, status, body_html, created_at
FROM incident_updates
WHERE incident_id = ANY(${sql.array(ids)}::uuid[])
ORDER BY created_at DESC
`;
const updatesByIncident: Record<string, IncidentUpdateRow[]> = {};
for (const u of allUpdates) {
if (!updatesByIncident[u.incident_id]) updatesByIncident[u.incident_id] = [];
updatesByIncident[u.incident_id]!.push({
id: u.id,
status: u.status,
body_html: u.body_html,
created_at: u.created_at instanceof Date ? u.created_at.toISOString() : String(u.created_at),
});
}
incidents = incidentRows.map((i) => ({
id: i.id,
title: i.title,
status: i.status,
severity: i.severity,
pinned: i.pinned,
started_at: i.started_at instanceof Date ? i.started_at.toISOString() : String(i.started_at),
resolved_at: i.resolved_at ? (i.resolved_at instanceof Date ? i.resolved_at.toISOString() : String(i.resolved_at)) : null,
updates: updatesByIncident[i.id] ?? [],
}));
}
return { monitor: m, incidents, generated_at: new Date().toISOString() };
}
export interface PagePayload {
page: Omit<StatusPageRow, "password_hash"> & { has_password: boolean };
groups: GroupRow[];
monitors: MonitorRow[];
incidents: { active: IncidentSummary[]; recent: IncidentSummary[] };
generated_at: string;
}
export async function loadPagePayload(slug: string, window?: Window): Promise<PagePayload | null> {
const page = await loadStatusPage(slug);
if (!page) return null;
const win = (window ?? page.default_window) as Window;
const [groups, monitors, incidents] = await Promise.all([
loadGroups(page.id),
loadMonitors(page.id, win, page.display_mode),
loadIncidents(page.id),
]);
const { password_hash, ...publicPage } = page;
return {
page: { ...publicPage, has_password: !!password_hash },
groups,
monitors,
incidents,
generated_at: new Date().toISOString(),
};
}