update region math

This commit is contained in:
nate 2026-04-09 01:39:08 +04:00
parent 1732a9d055
commit 264a51384c
1 changed files with 58 additions and 66 deletions

View File

@ -58,56 +58,6 @@ export interface MonitorRow {
latency_history: Array<{ region: string; latency_ms: number | null; ts: string }>;
}
// Average latency of the *fastest* region per monitor over a given window.
// Status pages are customer-facing — we want to show our best foot forward,
// not a noisy average that gets dragged down by a single distant region.
export async function loadFastestRegionLatency(
monitorIds: string[],
bucket: BucketType,
intervalLiteral: string,
): Promise<Record<string, number | null>> {
const out: Record<string, number | null> = {};
if (monitorIds.length === 0) return out;
for (const id of monitorIds) out[id] = null;
const ids = sql.array(monitorIds);
let rows = await sql<any[]>`
SELECT monitor_id, region,
(sum(avg_latency * total) / NULLIF(sum(total), 0))::float AS avg_lat
FROM monitor_uptime_rollup
WHERE monitor_id = ANY(${ids}::text[])
AND bucket_type = ${bucket}
AND bucket_start > now() - ${intervalLiteral}::interval
AND avg_latency IS NOT NULL
GROUP BY 1, 2
`;
if (rows.length === 0) {
// Fallback while rollup is unpopulated. Bounded by the same window so cheap.
rows = await sql<any[]>`
SELECT monitor_id, COALESCE(region, 'default') AS region,
avg(latency_ms)::float AS avg_lat
FROM pings
WHERE monitor_id = ANY(${ids}::text[])
AND checked_at > now() - ${intervalLiteral}::interval
AND latency_ms IS NOT NULL
GROUP BY 1, 2
`;
}
// For each monitor, keep the region with the lowest average latency.
for (const r of rows) {
if (r.avg_lat == null) continue;
const cur = out[r.monitor_id];
if (cur == null || r.avg_lat < cur) out[r.monitor_id] = r.avg_lat;
}
// Round to integer ms.
for (const id of Object.keys(out)) {
if (out[id] != null) out[id] = Math.round(out[id] as number);
}
return out;
}
// Single SQL pass that produces all four uptime windows for a set of monitors.
// Reads only the rollup table; falls back to a pings aggregate when the rollup
// has nothing for these monitors yet (same pattern as loadMonitors).
@ -238,18 +188,20 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
});
}
// Step 3: uptime rollup buckets covering the requested window.
// Step 3: uptime rollup buckets covering the requested window. We keep
// region in the result so JS can pick the fastest region per monitor and
// emit per-bucket latency from just that region (status pages are
// customer-facing, we show our best foot forward).
const { bucket, count } = WINDOW_TO_BUCKET[window];
const truncUnit = bucket === "hourly" ? "hour" : "day";
const intervalLiteral = `${count} ${truncUnit}s`;
let rollupRows = await sql<any[]>`
SELECT monitor_id, bucket_start, sum(total)::int AS total, sum(up_count)::int AS up_count, avg(avg_latency)::real AS avg_latency
SELECT monitor_id, region, bucket_start, total, up_count, avg_latency
FROM monitor_uptime_rollup
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
AND bucket_type = ${bucket}
AND bucket_start > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
GROUP BY monitor_id, bucket_start
ORDER BY monitor_id, bucket_start ASC
ORDER BY monitor_id, region, bucket_start ASC
`;
// Fallback: if the rollup table has nothing for any of these monitors in
@ -257,11 +209,10 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
// silently broken), aggregate directly from pings. Bounded by the window so
// it stays cheap. Once the rollup catches up this branch never fires.
if (rollupRows.length === 0) {
// Group/order by ordinals — Postgres won't dedupe a $-parameterised
// date_trunc() between SELECT and GROUP BY otherwise.
rollupRows = await sql<any[]>`
SELECT
monitor_id,
COALESCE(region, 'default') AS region,
date_trunc(${truncUnit}, checked_at) AS bucket_start,
count(*)::int AS total,
count(*) FILTER (WHERE up)::int AS up_count,
@ -269,21 +220,59 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
FROM pings
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
AND checked_at > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
GROUP BY 1, 2
ORDER BY 1, 2 ASC
GROUP BY 1, 2, 3
ORDER BY 1, 2, 3 ASC
`;
}
// Index actual rollup data by (monitor_id, isoBucketStart) so we can fill in
// the missing slots below.
const indexed: Record<string, Record<string, { total: number; up: number; avg_latency: number | null }>> = {};
// Single pass over rollup rows builds three indices:
// indexed[mid][isoStart] → cross-region {total, up} for bar coloring
// regionLat[mid][region] → cross-window weighted latency for picking fastest region
// regionBucketLat[mid][region][isoStart] → per-bucket latency for the fastest-region tooltip lookup
const indexed: Record<string, Record<string, { total: number; up: number }>> = {};
const regionLat: Record<string, Record<string, { sum: number; n: number }>> = {};
const regionBucketLat: Record<string, Record<string, Record<string, number>>> = {};
for (const r of rollupRows) {
const startIso = r.bucket_start instanceof Date ? r.bucket_start.toISOString() : String(r.bucket_start);
// Cross-region bucket totals (for bar coloring)
if (!indexed[r.monitor_id]) indexed[r.monitor_id] = {};
indexed[r.monitor_id]![startIso] = { total: r.total, up: r.up_count, avg_latency: r.avg_latency ?? null };
const slot = indexed[r.monitor_id]![startIso] ?? { total: 0, up: 0 };
slot.total += Number(r.total);
slot.up += Number(r.up_count);
indexed[r.monitor_id]![startIso] = slot;
// Per-region latency tracking
if (r.avg_latency != null && Number(r.total) > 0) {
if (!regionLat[r.monitor_id]) regionLat[r.monitor_id] = {};
const acc = regionLat[r.monitor_id]![r.region] ?? { sum: 0, n: 0 };
acc.sum += Number(r.avg_latency) * Number(r.total);
acc.n += Number(r.total);
regionLat[r.monitor_id]![r.region] = acc;
if (!regionBucketLat[r.monitor_id]) regionBucketLat[r.monitor_id] = {};
if (!regionBucketLat[r.monitor_id]![r.region]) regionBucketLat[r.monitor_id]![r.region] = {};
regionBucketLat[r.monitor_id]![r.region]![startIso] = Math.round(Number(r.avg_latency));
}
}
// Pick the fastest region per monitor (lowest weighted average latency over
// the whole window). All per-bucket latency display falls back to this
// region's per-bucket numbers; the per-monitor avg_latency uses the same.
const fastestRegionByMonitor: Record<string, string | null> = {};
const fastestLatency: Record<string, number | null> = {};
for (const id of ids) {
let bestRegion: string | null = null;
let bestAvg = Infinity;
const regions = regionLat[id] ?? {};
for (const [region, acc] of Object.entries(regions)) {
if (acc.n === 0) continue;
const avg = acc.sum / acc.n;
if (avg < bestAvg) { bestAvg = avg; bestRegion = region; }
}
fastestRegionByMonitor[id] = bestRegion;
fastestLatency[id] = bestRegion != null ? Math.round(bestAvg) : null;
}
// Customer-facing latency = average of the fastest region for the page's
// window. Computed via a separate query that retains per-region info.
const fastestLatency = await loadFastestRegionLatency(ids, bucket, intervalLiteral);
// Generate the full sequence of expected bucket timestamps so empty bars
// render as "no data" instead of disappearing entirely. Truncate `now()` to
@ -303,10 +292,13 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
const bucketsByMonitor: Record<string, MonitorRow["buckets"]> = {};
for (const id of ids) {
const slotMap = indexed[id] ?? {};
const bestRegion = fastestRegionByMonitor[id];
const fastestBuckets = bestRegion ? regionBucketLat[id]?.[bestRegion] ?? {} : {};
bucketsByMonitor[id] = slotIsos.map((iso) => {
const hit = slotMap[iso];
const lat = fastestBuckets[iso] ?? null;
return hit
? { start: iso, total: hit.total, up: hit.up, avg_latency: hit.avg_latency != null ? Math.round(hit.avg_latency) : null }
? { start: iso, total: hit.total, up: hit.up, avg_latency: lat }
: { start: iso, total: 0, up: 0, avg_latency: null };
});
}