update region math
This commit is contained in:
parent
1732a9d055
commit
264a51384c
|
|
@ -58,56 +58,6 @@ export interface MonitorRow {
|
||||||
latency_history: Array<{ region: string; latency_ms: number | null; ts: string }>;
|
latency_history: Array<{ region: string; latency_ms: number | null; ts: string }>;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Average latency of the *fastest* region per monitor over a given window.
|
|
||||||
// Status pages are customer-facing — we want to show our best foot forward,
|
|
||||||
// not a noisy average that gets dragged down by a single distant region.
|
|
||||||
export async function loadFastestRegionLatency(
|
|
||||||
monitorIds: string[],
|
|
||||||
bucket: BucketType,
|
|
||||||
intervalLiteral: string,
|
|
||||||
): Promise<Record<string, number | null>> {
|
|
||||||
const out: Record<string, number | null> = {};
|
|
||||||
if (monitorIds.length === 0) return out;
|
|
||||||
for (const id of monitorIds) out[id] = null;
|
|
||||||
|
|
||||||
const ids = sql.array(monitorIds);
|
|
||||||
let rows = await sql<any[]>`
|
|
||||||
SELECT monitor_id, region,
|
|
||||||
(sum(avg_latency * total) / NULLIF(sum(total), 0))::float AS avg_lat
|
|
||||||
FROM monitor_uptime_rollup
|
|
||||||
WHERE monitor_id = ANY(${ids}::text[])
|
|
||||||
AND bucket_type = ${bucket}
|
|
||||||
AND bucket_start > now() - ${intervalLiteral}::interval
|
|
||||||
AND avg_latency IS NOT NULL
|
|
||||||
GROUP BY 1, 2
|
|
||||||
`;
|
|
||||||
|
|
||||||
if (rows.length === 0) {
|
|
||||||
// Fallback while rollup is unpopulated. Bounded by the same window so cheap.
|
|
||||||
rows = await sql<any[]>`
|
|
||||||
SELECT monitor_id, COALESCE(region, 'default') AS region,
|
|
||||||
avg(latency_ms)::float AS avg_lat
|
|
||||||
FROM pings
|
|
||||||
WHERE monitor_id = ANY(${ids}::text[])
|
|
||||||
AND checked_at > now() - ${intervalLiteral}::interval
|
|
||||||
AND latency_ms IS NOT NULL
|
|
||||||
GROUP BY 1, 2
|
|
||||||
`;
|
|
||||||
}
|
|
||||||
|
|
||||||
// For each monitor, keep the region with the lowest average latency.
|
|
||||||
for (const r of rows) {
|
|
||||||
if (r.avg_lat == null) continue;
|
|
||||||
const cur = out[r.monitor_id];
|
|
||||||
if (cur == null || r.avg_lat < cur) out[r.monitor_id] = r.avg_lat;
|
|
||||||
}
|
|
||||||
// Round to integer ms.
|
|
||||||
for (const id of Object.keys(out)) {
|
|
||||||
if (out[id] != null) out[id] = Math.round(out[id] as number);
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Single SQL pass that produces all four uptime windows for a set of monitors.
|
// Single SQL pass that produces all four uptime windows for a set of monitors.
|
||||||
// Reads only the rollup table; falls back to a pings aggregate when the rollup
|
// Reads only the rollup table; falls back to a pings aggregate when the rollup
|
||||||
// has nothing for these monitors yet (same pattern as loadMonitors).
|
// has nothing for these monitors yet (same pattern as loadMonitors).
|
||||||
|
|
@ -238,18 +188,20 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
// Step 3: uptime rollup buckets covering the requested window.
|
// Step 3: uptime rollup buckets covering the requested window. We keep
|
||||||
|
// region in the result so JS can pick the fastest region per monitor and
|
||||||
|
// emit per-bucket latency from just that region (status pages are
|
||||||
|
// customer-facing, we show our best foot forward).
|
||||||
const { bucket, count } = WINDOW_TO_BUCKET[window];
|
const { bucket, count } = WINDOW_TO_BUCKET[window];
|
||||||
const truncUnit = bucket === "hourly" ? "hour" : "day";
|
const truncUnit = bucket === "hourly" ? "hour" : "day";
|
||||||
const intervalLiteral = `${count} ${truncUnit}s`;
|
const intervalLiteral = `${count} ${truncUnit}s`;
|
||||||
let rollupRows = await sql<any[]>`
|
let rollupRows = await sql<any[]>`
|
||||||
SELECT monitor_id, bucket_start, sum(total)::int AS total, sum(up_count)::int AS up_count, avg(avg_latency)::real AS avg_latency
|
SELECT monitor_id, region, bucket_start, total, up_count, avg_latency
|
||||||
FROM monitor_uptime_rollup
|
FROM monitor_uptime_rollup
|
||||||
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
|
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
|
||||||
AND bucket_type = ${bucket}
|
AND bucket_type = ${bucket}
|
||||||
AND bucket_start > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
|
AND bucket_start > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
|
||||||
GROUP BY monitor_id, bucket_start
|
ORDER BY monitor_id, region, bucket_start ASC
|
||||||
ORDER BY monitor_id, bucket_start ASC
|
|
||||||
`;
|
`;
|
||||||
|
|
||||||
// Fallback: if the rollup table has nothing for any of these monitors in
|
// Fallback: if the rollup table has nothing for any of these monitors in
|
||||||
|
|
@ -257,11 +209,10 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
|
||||||
// silently broken), aggregate directly from pings. Bounded by the window so
|
// silently broken), aggregate directly from pings. Bounded by the window so
|
||||||
// it stays cheap. Once the rollup catches up this branch never fires.
|
// it stays cheap. Once the rollup catches up this branch never fires.
|
||||||
if (rollupRows.length === 0) {
|
if (rollupRows.length === 0) {
|
||||||
// Group/order by ordinals — Postgres won't dedupe a $-parameterised
|
|
||||||
// date_trunc() between SELECT and GROUP BY otherwise.
|
|
||||||
rollupRows = await sql<any[]>`
|
rollupRows = await sql<any[]>`
|
||||||
SELECT
|
SELECT
|
||||||
monitor_id,
|
monitor_id,
|
||||||
|
COALESCE(region, 'default') AS region,
|
||||||
date_trunc(${truncUnit}, checked_at) AS bucket_start,
|
date_trunc(${truncUnit}, checked_at) AS bucket_start,
|
||||||
count(*)::int AS total,
|
count(*)::int AS total,
|
||||||
count(*) FILTER (WHERE up)::int AS up_count,
|
count(*) FILTER (WHERE up)::int AS up_count,
|
||||||
|
|
@ -269,21 +220,59 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
|
||||||
FROM pings
|
FROM pings
|
||||||
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
|
WHERE monitor_id = ANY(${sql.array(ids)}::text[])
|
||||||
AND checked_at > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
|
AND checked_at > date_trunc(${truncUnit}, now()) - ${intervalLiteral}::interval
|
||||||
GROUP BY 1, 2
|
GROUP BY 1, 2, 3
|
||||||
ORDER BY 1, 2 ASC
|
ORDER BY 1, 2, 3 ASC
|
||||||
`;
|
`;
|
||||||
}
|
}
|
||||||
// Index actual rollup data by (monitor_id, isoBucketStart) so we can fill in
|
|
||||||
// the missing slots below.
|
// Single pass over rollup rows builds three indices:
|
||||||
const indexed: Record<string, Record<string, { total: number; up: number; avg_latency: number | null }>> = {};
|
// indexed[mid][isoStart] → cross-region {total, up} for bar coloring
|
||||||
|
// regionLat[mid][region] → cross-window weighted latency for picking fastest region
|
||||||
|
// regionBucketLat[mid][region][isoStart] → per-bucket latency for the fastest-region tooltip lookup
|
||||||
|
const indexed: Record<string, Record<string, { total: number; up: number }>> = {};
|
||||||
|
const regionLat: Record<string, Record<string, { sum: number; n: number }>> = {};
|
||||||
|
const regionBucketLat: Record<string, Record<string, Record<string, number>>> = {};
|
||||||
for (const r of rollupRows) {
|
for (const r of rollupRows) {
|
||||||
const startIso = r.bucket_start instanceof Date ? r.bucket_start.toISOString() : String(r.bucket_start);
|
const startIso = r.bucket_start instanceof Date ? r.bucket_start.toISOString() : String(r.bucket_start);
|
||||||
|
|
||||||
|
// Cross-region bucket totals (for bar coloring)
|
||||||
if (!indexed[r.monitor_id]) indexed[r.monitor_id] = {};
|
if (!indexed[r.monitor_id]) indexed[r.monitor_id] = {};
|
||||||
indexed[r.monitor_id]![startIso] = { total: r.total, up: r.up_count, avg_latency: r.avg_latency ?? null };
|
const slot = indexed[r.monitor_id]![startIso] ?? { total: 0, up: 0 };
|
||||||
|
slot.total += Number(r.total);
|
||||||
|
slot.up += Number(r.up_count);
|
||||||
|
indexed[r.monitor_id]![startIso] = slot;
|
||||||
|
|
||||||
|
// Per-region latency tracking
|
||||||
|
if (r.avg_latency != null && Number(r.total) > 0) {
|
||||||
|
if (!regionLat[r.monitor_id]) regionLat[r.monitor_id] = {};
|
||||||
|
const acc = regionLat[r.monitor_id]![r.region] ?? { sum: 0, n: 0 };
|
||||||
|
acc.sum += Number(r.avg_latency) * Number(r.total);
|
||||||
|
acc.n += Number(r.total);
|
||||||
|
regionLat[r.monitor_id]![r.region] = acc;
|
||||||
|
|
||||||
|
if (!regionBucketLat[r.monitor_id]) regionBucketLat[r.monitor_id] = {};
|
||||||
|
if (!regionBucketLat[r.monitor_id]![r.region]) regionBucketLat[r.monitor_id]![r.region] = {};
|
||||||
|
regionBucketLat[r.monitor_id]![r.region]![startIso] = Math.round(Number(r.avg_latency));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Pick the fastest region per monitor (lowest weighted average latency over
|
||||||
|
// the whole window). All per-bucket latency display falls back to this
|
||||||
|
// region's per-bucket numbers; the per-monitor avg_latency uses the same.
|
||||||
|
const fastestRegionByMonitor: Record<string, string | null> = {};
|
||||||
|
const fastestLatency: Record<string, number | null> = {};
|
||||||
|
for (const id of ids) {
|
||||||
|
let bestRegion: string | null = null;
|
||||||
|
let bestAvg = Infinity;
|
||||||
|
const regions = regionLat[id] ?? {};
|
||||||
|
for (const [region, acc] of Object.entries(regions)) {
|
||||||
|
if (acc.n === 0) continue;
|
||||||
|
const avg = acc.sum / acc.n;
|
||||||
|
if (avg < bestAvg) { bestAvg = avg; bestRegion = region; }
|
||||||
|
}
|
||||||
|
fastestRegionByMonitor[id] = bestRegion;
|
||||||
|
fastestLatency[id] = bestRegion != null ? Math.round(bestAvg) : null;
|
||||||
}
|
}
|
||||||
// Customer-facing latency = average of the fastest region for the page's
|
|
||||||
// window. Computed via a separate query that retains per-region info.
|
|
||||||
const fastestLatency = await loadFastestRegionLatency(ids, bucket, intervalLiteral);
|
|
||||||
|
|
||||||
// Generate the full sequence of expected bucket timestamps so empty bars
|
// Generate the full sequence of expected bucket timestamps so empty bars
|
||||||
// render as "no data" instead of disappearing entirely. Truncate `now()` to
|
// render as "no data" instead of disappearing entirely. Truncate `now()` to
|
||||||
|
|
@ -303,10 +292,13 @@ export async function loadMonitors(pageId: string, window: Window, pageDisplayMo
|
||||||
const bucketsByMonitor: Record<string, MonitorRow["buckets"]> = {};
|
const bucketsByMonitor: Record<string, MonitorRow["buckets"]> = {};
|
||||||
for (const id of ids) {
|
for (const id of ids) {
|
||||||
const slotMap = indexed[id] ?? {};
|
const slotMap = indexed[id] ?? {};
|
||||||
|
const bestRegion = fastestRegionByMonitor[id];
|
||||||
|
const fastestBuckets = bestRegion ? regionBucketLat[id]?.[bestRegion] ?? {} : {};
|
||||||
bucketsByMonitor[id] = slotIsos.map((iso) => {
|
bucketsByMonitor[id] = slotIsos.map((iso) => {
|
||||||
const hit = slotMap[iso];
|
const hit = slotMap[iso];
|
||||||
|
const lat = fastestBuckets[iso] ?? null;
|
||||||
return hit
|
return hit
|
||||||
? { start: iso, total: hit.total, up: hit.up, avg_latency: hit.avg_latency != null ? Math.round(hit.avg_latency) : null }
|
? { start: iso, total: hit.total, up: hit.up, avg_latency: lat }
|
||||||
: { start: iso, total: 0, up: 0, avg_latency: null };
|
: { start: iso, total: 0, up: 0, avg_latency: null };
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue