pingql/apps/status/src/data.ts

// Loads the read-only data needed to render a public status page. NEVER reads
// the raw `pings` table - uses `monitor_region_state` for current state and
// `monitor_uptime_rollup` for historical uptime windows.

import sql from "./db";

export type BucketType = "hourly" | "daily";

export interface StatusPageRow {
  id:                string;
  account_id:        string;
  slug:              string;
  title:             string;
  description:       string | null;
  theme:             "auto" | "light" | "dark";
  password_hash:     string | null;
  index_search:      boolean;
  show_powered_by:   boolean;
  show_response_time:boolean;

  bar_frequency:     BucketType;
  bar_count:         number;
  custom_domain:     string | null;
  custom_css:        string | null;
  footer_text:       string | null;
  auto_refresh_s:    number;
}

export interface MonitorRow {
  id:           string;
  display_name: string;
  // Note: the underlying monitor.url is intentionally NOT exposed on the
  // public payload. Status pages display `display_name`; the literal target
  // URL (which can contain auth tokens, internal hostnames, staging paths,
  // etc.) must never leak to anonymous visitors via the JSON endpoint.
  // Group correlator. Emitted as the matching group's `position` index
  // (0-based string), NOT the underlying UUID, so the JSON doesn't leak
  // internal IDs. The HTML render works the same either way - it just
  // looks up groups by this token.
  group_id:     string | null;
  position:     number;
  // 'paused' means the monitor was disabled in the dashboard - the runner has
  // stopped checking it, and the public page should treat it as planned
  // maintenance rather than an outage.
  current_state: "up" | "down" | "unknown" | "paused";
  region_states: Array<{ region: string; state: "up" | "down" | "unknown"; updated_at: string | null }>;
  uptime_pct:   number | null;
  buckets:      Array<{ start: string; total: number; up: number; avg_latency: number | null }>; // bar chart input
  avg_latency:  number | null;
  latency_history: Array<{ region: string; latency_ms: number | null; ts: string }>;
}

// Multi-window uptime (24h / 7d / 30d / 90d) is now derived from the same
// rollup row set that loadMonitors pulls for the bar chart - see the in-JS
// aggregation pass below. This used to be a second SQL round-trip running
// four FILTER aggregates that redid arithmetic the raw bucket rows already
// contained.

export interface GroupRow {
  id:       string;
  name:     string;
  position: number;
}

export interface IncidentUpdateRow {
  id:         string;
  status:     string;
  body_html:  string;
  created_at: string;
}

export interface IncidentSummary {
  id:          string;
  title:       string;
  status:      string;
  severity:    string;
  pinned:      boolean;
  started_at:  string;
  resolved_at: string | null;
  updates:     IncidentUpdateRow[];   // full timeline, newest first
}

export async function loadStatusPage(slug: string): Promise<StatusPageRow | null> {
  const [row] = await sql<StatusPageRow[]>`SELECT * FROM status_pages WHERE slug = ${slug}`;
  return row ?? null;
}

export async function loadStatusPageByDomain(domain: string): Promise<StatusPageRow | null> {
  const [row] = await sql<StatusPageRow[]>`SELECT * FROM status_pages WHERE custom_domain = ${domain}`;
  return row ?? null;
}

export async function verifyDomain(domain: string): Promise<boolean> {
  const [row] = await sql<{ id: string }[]>`SELECT id FROM status_pages WHERE custom_domain = ${domain}`;
  return !!row;
}

export async function loadGroups(pageId: string): Promise<GroupRow[]> {
  return sql<GroupRow[]>`
    SELECT id, name, position FROM status_page_groups
    WHERE status_page_id = ${pageId}
    ORDER BY position ASC, name ASC
  `;
}

export async function loadMonitors(
  pageId: string,
  barFrequency: BucketType = "daily",
  barCount: number = 90,
): Promise<MonitorRow[]> {
  // Step 1: page → monitors with display overrides + group + position. Pull
  // m.enabled too so we can render disabled monitors as "Maintenance" on the
  // public page (the runner stops checking them when disabled, so their
  // region_states would otherwise drift to a stale "up" - visitors should
  // see this as planned downtime, not phantom uptime).
  // Deliberately do NOT select m.url - see the MonitorRow comment for why the
  // raw target URL must never reach the public payload.
  const monitorRows = await sql<any[]>`
    SELECT
      spm.monitor_id        AS id,
      COALESCE(spm.display_name, m.name) AS display_name,
      m.enabled             AS enabled,
      spm.group_id,
      spm.position
    FROM status_page_monitors spm
    JOIN monitors m ON m.id = spm.monitor_id
    WHERE spm.status_page_id = ${pageId}
    ORDER BY spm.position ASC, m.name ASC
  `;
  if (monitorRows.length === 0) return [];

  const ids = monitorRows.map((r) => r.id);

  // Step 2: per-region current state for these monitors.
  const stateRows = await sql<{ monitor_id: string; region: string; last_state: string | null; updated_at: string }[]>`
    SELECT monitor_id, region, last_state, updated_at
    FROM monitor_region_state
    WHERE monitor_id = ANY(${sql.array(ids)}::text[])
  `;
  const stateByMonitor: Record<string, MonitorRow["region_states"]> = {};
  for (const s of stateRows) {
    if (!stateByMonitor[s.monitor_id]) stateByMonitor[s.monitor_id] = [];
    stateByMonitor[s.monitor_id]!.push({
      region: s.region,
      state: (s.last_state as any) ?? "unknown",
      updated_at: s.updated_at,
    });
  }

  // Step 3: ONE unified rollup query covering everything we need:
  //   - bar chart: bucket_type = barFrequency, last barCount buckets
  //   - multi-window uptime: hourly back 24h + daily back 90d
  //   - latency sparkline: hourly back 30h
  //
  // Union of all of those is "hourly back N hours OR daily back N days" with
  // N chosen to cover whichever consumer needs the widest window. The rows
  // are then partitioned by purpose entirely in JS - no second round-trip,
  // no duplicate FILTER aggregates inside Postgres.
  const bucket: BucketType = barFrequency;
  const count = Math.max(1, Math.min(180, barCount));

  // Hourly span has to cover the latency sparkline (30h) AND the bar chart if
  // it's hourly (up to 180h). +2h slack so the truncated bucket boundary at
  // the start of the window is included even if we cross an hour during the
  // request.
  const hourlyBackHours = Math.max(30, bucket === "hourly" ? count : 0) + 2;
  // Daily span has to cover multi-window uptime (90d) AND the bar chart if
  // it's daily (up to 180d). +1d slack for the same reason.
  const dailyBackDays = Math.max(90, bucket === "daily" ? count : 0) + 1;
  const hourlyInterval = `${hourlyBackHours} hours`;
  const dailyInterval  = `${dailyBackDays} days`;

  let rollupRows = await sql<any[]>`
    SELECT monitor_id, region, bucket_type, bucket_start, total, up_count, avg_latency
    FROM monitor_uptime_rollup
    WHERE monitor_id = ANY(${sql.array(ids)}::text[])
      AND (
        (bucket_type = 'hourly' AND bucket_start > now() - ${hourlyInterval}::interval)
        OR
        (bucket_type = 'daily'  AND bucket_start > now() - ${dailyInterval}::interval)
      )
    ORDER BY monitor_id, bucket_type, region, bucket_start ASC
  `;

  // Fallback: if the rollup table has nothing for any of these monitors in
  // either bucket type (cold deploy, broken job), aggregate directly from
  // pings. Produces both bucket types via UNION ALL so downstream JS doesn't
  // need to know which path it came from. Bounded by the wider of the two
  // windows so it stays cheap. Once the rollup catches up this never fires.
  if (rollupRows.length === 0) {
    rollupRows = await sql<any[]>`
      (
        SELECT
          monitor_id,
          COALESCE(region, 'default') AS region,
          'hourly'::text AS bucket_type,
          date_trunc('hour', checked_at) AS bucket_start,
          count(*)::int AS total,
          count(*) FILTER (WHERE up)::int AS up_count,
          avg(latency_ms)::real AS avg_latency
        FROM pings
        WHERE monitor_id = ANY(${sql.array(ids)}::text[])
          AND checked_at > now() - ${hourlyInterval}::interval
        GROUP BY 1, 2, 4
      )
      UNION ALL
      (
        SELECT
          monitor_id,
          COALESCE(region, 'default') AS region,
          'daily'::text AS bucket_type,
          date_trunc('day', checked_at) AS bucket_start,
          count(*)::int AS total,
          count(*) FILTER (WHERE up)::int AS up_count,
          avg(latency_ms)::real AS avg_latency
        FROM pings
        WHERE monitor_id = ANY(${sql.array(ids)}::text[])
          AND checked_at > now() - ${dailyInterval}::interval
        GROUP BY 1, 2, 4
      )
    `;
  }

  // Single pass over the unified rows builds every index we need:
  //   barIndexed[mid][isoStart]               → cross-region {total, up} for bar coloring (only rows of barFrequency)
  //   barRegionLat[mid][region]               → weighted latency over the bar window for picking fastest region
  //   barRegionBucketLat[mid][region][iso]    → per-bucket latency in the fastest region (only rows of barFrequency)
  //   latByMonitor[mid][]                     → 30h hourly latency sparkline rows
  const barIndexed: Record<string, Record<string, { total: number; up: number }>> = {};
  const barRegionLat: Record<string, Record<string, { sum: number; n: number }>> = {};
  const barRegionBucketLat: Record<string, Record<string, Record<string, number>>> = {};

  const latByMonitor: Record<string, MonitorRow["latency_history"]> = {};

  const nowMs = Date.now();
  const ms30h  = 30    * 3600_000;

  for (const r of rollupRows) {
    const startDate = r.bucket_start instanceof Date ? r.bucket_start : new Date(r.bucket_start);
    const startIso  = startDate.toISOString();
    const startMs   = startDate.getTime();
    const total     = Number(r.total);
    const up        = Number(r.up_count);
    const avgLat    = r.avg_latency == null ? null : Number(r.avg_latency);
    const mid       = r.monitor_id;
    const bt: BucketType = r.bucket_type;

    // Bar chart accumulators - only rows matching the configured bar frequency.
    if (bt === bucket) {
      if (!barIndexed[mid]) barIndexed[mid] = {};
      const slot = barIndexed[mid]![startIso] ?? { total: 0, up: 0 };
      slot.total += total;
      slot.up    += up;
      barIndexed[mid]![startIso] = slot;

      if (avgLat != null && total > 0) {
        if (!barRegionLat[mid]) barRegionLat[mid] = {};
        const acc = barRegionLat[mid]![r.region] ?? { sum: 0, n: 0 };
        acc.sum += avgLat * total;
        acc.n   += total;
        barRegionLat[mid]![r.region] = acc;

        if (!barRegionBucketLat[mid]) barRegionBucketLat[mid] = {};
        if (!barRegionBucketLat[mid]![r.region]) barRegionBucketLat[mid]![r.region] = {};
        barRegionBucketLat[mid]![r.region]![startIso] = Math.round(avgLat);
      }
    }

    // 30h hourly latency sparkline.
    if (bt === "hourly" && nowMs - startMs < ms30h) {
      if (!latByMonitor[mid]) latByMonitor[mid] = [];
      latByMonitor[mid]!.push({
        region: r.region,
        latency_ms: avgLat == null ? null : Math.round(avgLat),
        ts: startIso,
      });
    }
  }

  // Sort the latency sparkline rows by ts ASC per monitor (the unified query
  // sorts by bucket_type then region then bucket_start, so the per-monitor
  // hourly subset is already ordered within a region but interleaved across
  // regions - this normalises it the same way the old separate query did).
  for (const mid of Object.keys(latByMonitor)) {
    latByMonitor[mid]!.sort((a, b) => a.ts.localeCompare(b.ts));
  }

  // Pick the fastest region per monitor over the bar window (lowest weighted
  // average latency). Per-bucket latency display + the per-monitor avg_latency
  // both come from the chosen region.
  const fastestRegionByMonitor: Record<string, string | null> = {};
  const fastestLatency: Record<string, number | null> = {};
  for (const id of ids) {
    let bestRegion: string | null = null;
    let bestAvg = Infinity;
    const regions = barRegionLat[id] ?? {};
    for (const [region, acc] of Object.entries(regions)) {
      if (acc.n === 0) continue;
      const avg = acc.sum / acc.n;
      if (avg < bestAvg) { bestAvg = avg; bestRegion = region; }
    }
    fastestRegionByMonitor[id] = bestRegion;
    fastestLatency[id] = bestRegion != null ? Math.round(bestAvg) : null;
  }

  // Generate the full sequence of expected bucket timestamps so empty bars
  // render as "no data" instead of disappearing entirely. Truncate `now()`
  // to the unit so the slot boundaries line up with what the rollup writes.
  const bucketMs = bucket === "hourly" ? 3600_000 : 86_400_000;
  const truncate = (d: Date): Date => {
    const t = new Date(d);
    if (bucket === "hourly") t.setUTCMinutes(0, 0, 0);
    else                     t.setUTCHours(0, 0, 0, 0);
    return t;
  };
  const nowTrunc = truncate(new Date()).getTime();
  const slotIsos: string[] = [];
  for (let i = count - 1; i >= 0; i--) {
    slotIsos.push(new Date(nowTrunc - i * bucketMs).toISOString());
  }
  const bucketsByMonitor: Record<string, MonitorRow["buckets"]> = {};
  for (const id of ids) {
    const slotMap = barIndexed[id] ?? {};
    const bestRegion = fastestRegionByMonitor[id];
    const fastestBuckets = bestRegion ? barRegionBucketLat[id]?.[bestRegion] ?? {} : {};
    bucketsByMonitor[id] = slotIsos.map((iso) => {
      const hit = slotMap[iso];
      const lat = fastestBuckets[iso] ?? null;
      return hit
        ? { start: iso, total: hit.total, up: hit.up, avg_latency: lat }
        : { start: iso, total: 0, up: 0, avg_latency: null };
    });
  }

  const latencyByMonitorList = latByMonitor;

  return monitorRows.map((m) => {
    const region_states = stateByMonitor[m.id] ?? [];
    let current_state: MonitorRow["current_state"] = "unknown";
    if (region_states.length > 0) {
      const anyDown = region_states.some((s) => s.state === "down");
      const anyUp   = region_states.some((s) => s.state === "up");
      current_state = anyDown ? "down" : anyUp ? "up" : "unknown";
    }
    // A disabled monitor is in operator-declared maintenance - runner has
    // stopped checking it. Override whatever the last region state was so the
    // public page reads "Maintenance" instead of a stale "Operational".
    if (m.enabled === false) current_state = "paused";
    const buckets = bucketsByMonitor[m.id] ?? [];
    let uptime_pct: number | null = null;
    if (buckets.length > 0) {
      const tot = buckets.reduce((a, b) => a + b.total, 0);
      const upT = buckets.reduce((a, b) => a + b.up, 0);
      // Full precision - the display layer truncates (not rounds) to 2 decimals
      // so any downtime, however small, never visually rounds up to 100%.
      uptime_pct = tot > 0 ? (100 * upT / tot) : null;
    }
    const avg_latency = fastestLatency[m.id] ?? null;
    return {
      id:           m.id,
      display_name: m.display_name,
      group_id:     m.group_id,
      position:     m.position,
      current_state,
      region_states,
      uptime_pct,
      buckets,
      avg_latency,
      latency_history: latencyByMonitorList[m.id] ?? [],
    } as MonitorRow;
  });
}

export async function loadIncidents(pageId: string): Promise<{ active: IncidentSummary[]; recent: IncidentSummary[] }> {
  const incidents = await sql<any[]>`
    SELECT i.*
    FROM incidents i
    JOIN incident_status_pages isp ON isp.incident_id = i.id
    WHERE isp.status_page_id = ${pageId}
    ORDER BY i.started_at DESC
    LIMIT 50
  `;
  if (incidents.length === 0) return { active: [], recent: [] };

  const ids = incidents.map((i) => i.id);
  // Full timeline per incident (newest first), so the public page can show the
  // entire course of events on both active and resolved incidents.
  const allUpdates = await sql<any[]>`
    SELECT id, incident_id, status, body_html, created_at
    FROM incident_updates
    WHERE incident_id = ANY(${sql.array(ids)}::uuid[])
    ORDER BY created_at DESC
  `;
  const updatesByIncident: Record<string, IncidentUpdateRow[]> = {};
  for (const u of allUpdates) {
    if (!updatesByIncident[u.incident_id]) updatesByIncident[u.incident_id] = [];
    updatesByIncident[u.incident_id]!.push({
      id:         u.id,
      status:     u.status,
      body_html:  u.body_html,
      created_at: u.created_at instanceof Date ? u.created_at.toISOString() : String(u.created_at),
    });
  }

  const enriched: IncidentSummary[] = incidents.map((i) => ({
    id:          i.id,
    title:       i.title,
    status:      i.status,
    severity:    i.severity,
    pinned:      i.pinned,
    started_at:  i.started_at instanceof Date ? i.started_at.toISOString() : String(i.started_at),
    resolved_at: i.resolved_at ? (i.resolved_at instanceof Date ? i.resolved_at.toISOString() : String(i.resolved_at)) : null,
    updates:     updatesByIncident[i.id] ?? [],
  }));

  const active = enriched.filter((i) => i.pinned && !i.resolved_at);
  const recent = enriched.filter((i) => !active.includes(i));
  return { active, recent };
}

export interface MonitorDetailPayload {
  monitor:   MonitorRow;
  incidents: IncidentSummary[];        // recent incidents that touch this monitor
  generated_at: string;
}

export async function loadMonitorDetail(slug: string, monitorId: string): Promise<MonitorDetailPayload | null> {
  const page = await loadStatusPage(slug);
  if (!page) return null;
  // Existence check only - confirm the monitor is actually attached to this
  // page. The bulk loader below produces the full payload; this query exists
  // purely so we can return null on a wrong slug/monitor combo without firing
  // the bigger query at all.
  const [link] = await sql<any[]>`
    SELECT 1
    FROM status_page_monitors spm
    WHERE spm.status_page_id = ${page.id} AND spm.monitor_id = ${monitorId}
  `;
  if (!link) return null;

  // Reuse the bulk loader with a single-monitor list - keeps the bucket/state
  // logic in one place. Cheap because we're querying for one ID. We also need
  // the page's groups so we can redact the monitor's group_id (UUID → public
  // position-as-string token), matching what /:slug.json emits.
  const [allGroups, allMonitors] = await Promise.all([
    loadGroups(page.id),
    loadMonitors(page.id, page.bar_frequency, page.bar_count),
  ]);
  const { monitors } = redactGroupsAndMonitors(allGroups, allMonitors);
  const m = monitors.find((x) => x.id === monitorId);
  if (!m) return null;

  // Incidents touching this monitor (any status), most recent 20, full timeline.
  const incidentRows = await sql<any[]>`
    SELECT i.*
    FROM incidents i
    JOIN incident_monitors im ON im.incident_id = i.id
    WHERE im.monitor_id = ${monitorId} AND i.account_id = ${page.account_id}
    ORDER BY i.started_at DESC
    LIMIT 20
  `;
  let incidents: IncidentSummary[] = [];
  if (incidentRows.length > 0) {
    const ids = incidentRows.map((i) => i.id);
    const allUpdates = await sql<any[]>`
      SELECT id, incident_id, status, body_html, created_at
      FROM incident_updates
      WHERE incident_id = ANY(${sql.array(ids)}::uuid[])
      ORDER BY created_at DESC
    `;
    const updatesByIncident: Record<string, IncidentUpdateRow[]> = {};
    for (const u of allUpdates) {
      if (!updatesByIncident[u.incident_id]) updatesByIncident[u.incident_id] = [];
      updatesByIncident[u.incident_id]!.push({
        id:         u.id,
        status:     u.status,
        body_html:  u.body_html,
        created_at: u.created_at instanceof Date ? u.created_at.toISOString() : String(u.created_at),
      });
    }
    incidents = incidentRows.map((i) => ({
      id:          i.id,
      title:       i.title,
      status:      i.status,
      severity:    i.severity,
      pinned:      i.pinned,
      started_at:  i.started_at instanceof Date ? i.started_at.toISOString() : String(i.started_at),
      resolved_at: i.resolved_at ? (i.resolved_at instanceof Date ? i.resolved_at.toISOString() : String(i.resolved_at)) : null,
      updates:     updatesByIncident[i.id] ?? [],
    }));
  }

  return { monitor: m, incidents, generated_at: new Date().toISOString() };
}

// The shape we actually expose to anonymous visitors. Computed by stripping
// internal IDs and any field a public consumer doesn't need from the row
// types - see redactPageForPublic / redactGroupsAndMonitors below.
export interface PublicPageView {
  slug:               string;
  title:              string;
  description:        string | null;
  theme:              "auto" | "light" | "dark";
  index_search:       boolean;
  show_powered_by:    boolean;
  show_response_time: boolean;
  bar_frequency:      BucketType;
  bar_count:          number;
  footer_text:        string | null;
  auto_refresh_s:     number;
  has_password:       boolean;
}

export interface PublicGroupView {
  id:       string;   // re-keyed to position-as-string, NOT the underlying UUID
  name:     string;
  position: number;
}

export interface PagePayload {
  page:      PublicPageView;
  groups:    PublicGroupView[];
  monitors:  MonitorRow[];
  incidents: { active: IncidentSummary[]; recent: IncidentSummary[] };
  generated_at: string;
}

// Strip everything that doesn't belong on an unauthenticated payload:
//   - account_id     leaks the customer identifier across the platform
//   - id             internal status_page UUID, no consumer needs it
//   - password_hash  obvious
function redactPageForPublic(p: StatusPageRow): PublicPageView {
  return {
    slug:               p.slug,
    title:              p.title,
    description:        p.description,
    theme:              p.theme,
    index_search:       p.index_search,
    show_powered_by:    p.show_powered_by,
    show_response_time: p.show_response_time,
    bar_frequency:      p.bar_frequency,
    bar_count:          p.bar_count,
    footer_text:        p.footer_text,
    auto_refresh_s:     p.auto_refresh_s,
    has_password:       !!p.password_hash,
  };
}

// Replace each group's UUID with its position-as-string. Monitors carry the
// same token in their group_id field, so the consumer can still join them
// - they just see opaque "0", "1", "2" tokens instead of internal UUIDs.
function redactGroupsAndMonitors(
  groups: GroupRow[],
  monitors: MonitorRow[],
): { groups: PublicGroupView[]; monitors: MonitorRow[] } {
  const idMap = new Map<string, string>();
  groups.forEach((g, i) => idMap.set(g.id, String(i)));
  const publicGroups: PublicGroupView[] = groups.map((g, i) => ({
    id:       String(i),
    name:     g.name,
    position: g.position,
  }));
  const publicMonitors = monitors.map((m) => ({
    ...m,
    group_id: m.group_id ? (idMap.get(m.group_id) ?? null) : null,
  }));
  return { groups: publicGroups, monitors: publicMonitors };
}

export async function loadPagePayload(slug: string): Promise<PagePayload | null> {
  const page = await loadStatusPage(slug);
  if (!page) return null;
  const [rawGroups, rawMonitors, incidents] = await Promise.all([
    loadGroups(page.id),
    loadMonitors(page.id, page.bar_frequency, page.bar_count),
    loadIncidents(page.id),
  ]);
  const { groups, monitors } = redactGroupsAndMonitors(rawGroups, rawMonitors);
  return {
    page:      redactPageForPublic(page),
    groups,
    monitors,
    incidents,
    generated_at: new Date().toISOString(),
  };
}