feat: multi-region monitor support — region selector in UI, region flag on pings
This commit is contained in:
parent
52f7f8102b
commit
93db31db3b
|
|
@ -49,6 +49,8 @@ export async function migrate() {
|
|||
// Migrations for existing deployments
|
||||
await sql`ALTER TABLE pings ADD COLUMN IF NOT EXISTS scheduled_at TIMESTAMPTZ`;
|
||||
await sql`ALTER TABLE pings ADD COLUMN IF NOT EXISTS jitter_ms INTEGER`;
|
||||
await sql`ALTER TABLE monitors ADD COLUMN IF NOT EXISTS regions TEXT[] NOT NULL DEFAULT '{}'`;
|
||||
await sql`ALTER TABLE pings ADD COLUMN IF NOT EXISTS region TEXT`;
|
||||
|
||||
await sql`CREATE INDEX IF NOT EXISTS idx_pings_monitor ON pings(monitor_id, checked_at DESC)`;
|
||||
await sql`CREATE INDEX IF NOT EXISTS idx_pings_checked_at ON pings(checked_at)`;
|
||||
|
|
|
|||
|
|
@ -33,9 +33,10 @@ export const internal = new Elysia({ prefix: "/internal", detail: { hide: true }
|
|||
|
||||
// Returns monitors that are due for a check.
|
||||
// scheduled_at = last_checked_at + interval_s (ideal fire time), so jitter = actual_start - scheduled_at
|
||||
.get("/due", async () => {
|
||||
.get("/due", async ({ query }) => {
|
||||
const region = query.region as string | undefined;
|
||||
const monitors = await sql`
|
||||
SELECT m.id, m.url, m.method, m.request_headers, m.request_body, m.timeout_ms, m.interval_s, m.query,
|
||||
SELECT m.id, m.url, m.method, m.request_headers, m.request_body, m.timeout_ms, m.interval_s, m.query, m.regions,
|
||||
CASE
|
||||
WHEN last.checked_at IS NULL THEN now()
|
||||
ELSE last.checked_at + (m.interval_s || ' seconds')::interval
|
||||
|
|
@ -49,6 +50,11 @@ export const internal = new Elysia({ prefix: "/internal", detail: { hide: true }
|
|||
WHERE m.enabled = true
|
||||
AND (last.checked_at IS NULL
|
||||
OR last.checked_at < now() - (m.interval_s || ' seconds')::interval)
|
||||
AND (
|
||||
array_length(m.regions, 1) IS NULL
|
||||
OR m.regions = '{}'
|
||||
OR ${region ? sql`${region} = ANY(m.regions)` : sql`true`}
|
||||
)
|
||||
`;
|
||||
return monitors;
|
||||
})
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ const MonitorBody = t.Object({
|
|||
timeout_ms: t.Optional(t.Number({ minimum: 1000, maximum: 60000, default: 30000, description: "Request timeout in ms" })),
|
||||
interval_s: t.Optional(t.Number({ minimum: 1, default: 60, description: "Check interval in seconds" })),
|
||||
query: t.Optional(t.Any({ description: "PingQL query — filter conditions for up/down" })),
|
||||
regions: t.Optional(t.Array(t.String(), { description: "Regions to run checks from. Empty array = all regions." })),
|
||||
});
|
||||
|
||||
export const monitors = new Elysia({ prefix: "/monitors" })
|
||||
|
|
@ -28,8 +29,9 @@ export const monitors = new Elysia({ prefix: "/monitors" })
|
|||
const ssrfError = await validateMonitorUrl(body.url);
|
||||
if (ssrfError) return error(400, { error: ssrfError });
|
||||
|
||||
const regions = body.regions ?? [];
|
||||
const [monitor] = await sql`
|
||||
INSERT INTO monitors (account_id, name, url, method, request_headers, request_body, timeout_ms, interval_s, query)
|
||||
INSERT INTO monitors (account_id, name, url, method, request_headers, request_body, timeout_ms, interval_s, query, regions)
|
||||
VALUES (
|
||||
${accountId}, ${body.name}, ${body.url},
|
||||
${(body.method ?? 'GET').toUpperCase()},
|
||||
|
|
@ -37,7 +39,8 @@ export const monitors = new Elysia({ prefix: "/monitors" })
|
|||
${body.request_body ?? null},
|
||||
${body.timeout_ms ?? 30000},
|
||||
${body.interval_s ?? 60},
|
||||
${body.query ? sql.json(body.query) : null}
|
||||
${body.query ? sql.json(body.query) : null},
|
||||
${sql.array(regions)}
|
||||
)
|
||||
RETURNING *
|
||||
`;
|
||||
|
|
@ -75,7 +78,8 @@ export const monitors = new Elysia({ prefix: "/monitors" })
|
|||
request_body = COALESCE(${body.request_body ?? null}, request_body),
|
||||
timeout_ms = COALESCE(${body.timeout_ms ?? null}, timeout_ms),
|
||||
interval_s = COALESCE(${body.interval_s ?? null}, interval_s),
|
||||
query = COALESCE(${body.query ? sql.json(body.query) : null}, query)
|
||||
query = COALESCE(${body.query ? sql.json(body.query) : null}, query),
|
||||
regions = COALESCE(${body.regions ? sql.array(body.regions) : null}, regions)
|
||||
WHERE id = ${params.id} AND account_id = ${accountId}
|
||||
RETURNING *
|
||||
`;
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ export const ingest = new Elysia()
|
|||
const jitterMs = body.jitter_ms ?? null;
|
||||
|
||||
const [ping] = await sql`
|
||||
INSERT INTO pings (monitor_id, scheduled_at, jitter_ms, status_code, latency_ms, up, error, meta)
|
||||
INSERT INTO pings (monitor_id, scheduled_at, jitter_ms, status_code, latency_ms, up, error, meta, region)
|
||||
VALUES (
|
||||
${body.monitor_id},
|
||||
${scheduledAt},
|
||||
|
|
@ -82,7 +82,8 @@ export const ingest = new Elysia()
|
|||
${body.latency_ms ?? null},
|
||||
${body.up},
|
||||
${body.error ?? null},
|
||||
${Object.keys(meta).length > 0 ? sql.json(meta) : null}
|
||||
${Object.keys(meta).length > 0 ? sql.json(meta) : null},
|
||||
${body.region ?? null}
|
||||
)
|
||||
RETURNING *
|
||||
`;
|
||||
|
|
@ -103,6 +104,7 @@ export const ingest = new Elysia()
|
|||
error: t.Optional(t.Nullable(t.String())),
|
||||
cert_expiry_days: t.Optional(t.Nullable(t.Number())),
|
||||
meta: t.Optional(t.Any()),
|
||||
region: t.Optional(t.Nullable(t.String())),
|
||||
}),
|
||||
detail: { hide: true },
|
||||
})
|
||||
|
|
|
|||
|
|
@ -26,8 +26,9 @@ async fn main() -> Result<()> {
|
|||
.unwrap_or_else(|_| "http://localhost:3000".into());
|
||||
let monitor_token = env::var("MONITOR_TOKEN")
|
||||
.expect("MONITOR_TOKEN must be set");
|
||||
let region = env::var("REGION").unwrap_or_default();
|
||||
|
||||
info!("PingQL monitor starting, coordinator: {coordinator_url}");
|
||||
info!("PingQL monitor starting, coordinator: {coordinator_url}, region: {}", if region.is_empty() { "all" } else { ®ion });
|
||||
|
||||
let client = reqwest::Client::builder()
|
||||
.user_agent("PingQL-Monitor/0.1")
|
||||
|
|
@ -37,7 +38,7 @@ async fn main() -> Result<()> {
|
|||
let in_flight: Arc<Mutex<HashSet<String>>> = Arc::new(Mutex::new(HashSet::new()));
|
||||
|
||||
loop {
|
||||
match runner::fetch_and_run(&client, &coordinator_url, &monitor_token, &in_flight).await {
|
||||
match runner::fetch_and_run(&client, &coordinator_url, &monitor_token, ®ion, &in_flight).await {
|
||||
Ok(n) => { if n > 0 { info!("Spawned {n} checks"); } },
|
||||
Err(e) => error!("Check cycle failed: {e}"),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,11 +13,17 @@ pub async fn fetch_and_run(
|
|||
client: &reqwest::Client,
|
||||
coordinator_url: &str,
|
||||
token: &str,
|
||||
region: &str,
|
||||
in_flight: &Arc<Mutex<HashSet<String>>>,
|
||||
) -> Result<usize> {
|
||||
// Fetch due monitors
|
||||
// Fetch due monitors for this region
|
||||
let url = if region.is_empty() {
|
||||
format!("{coordinator_url}/internal/due")
|
||||
} else {
|
||||
format!("{coordinator_url}/internal/due?region={}", region)
|
||||
};
|
||||
let monitors: Vec<Monitor> = client
|
||||
.get(format!("{coordinator_url}/internal/due"))
|
||||
.get(&url)
|
||||
.header("x-monitor-token", token)
|
||||
.send()
|
||||
.await?
|
||||
|
|
@ -42,12 +48,13 @@ pub async fn fetch_and_run(
|
|||
let client = client.clone();
|
||||
let coordinator_url = coordinator_url.to_string();
|
||||
let token = token.to_string();
|
||||
let region_owned = region.to_string();
|
||||
let in_flight = in_flight.clone();
|
||||
tokio::spawn(async move {
|
||||
let timeout_ms = monitor.timeout_ms.unwrap_or(30000);
|
||||
// Hard deadline: timeout + 5s buffer, so hung checks always resolve
|
||||
let deadline = std::time::Duration::from_millis(timeout_ms + 5000);
|
||||
let result = match tokio::time::timeout(deadline, run_check(&client, &monitor, monitor.scheduled_at.clone())).await {
|
||||
let result = match tokio::time::timeout(deadline, run_check(&client, &monitor, monitor.scheduled_at.clone(), ®ion_owned)).await {
|
||||
Ok(r) => r,
|
||||
Err(_) => PingResult {
|
||||
monitor_id: monitor.id.clone(),
|
||||
|
|
@ -59,6 +66,7 @@ pub async fn fetch_and_run(
|
|||
error: Some(format!("timed out after {}ms", timeout_ms)),
|
||||
cert_expiry_days: None,
|
||||
meta: None,
|
||||
region: if region_owned.is_empty() { None } else { Some(region_owned.clone()) },
|
||||
},
|
||||
};
|
||||
// Post result first, then clear in-flight — this prevents the next
|
||||
|
|
@ -73,7 +81,7 @@ pub async fn fetch_and_run(
|
|||
Ok(spawned)
|
||||
}
|
||||
|
||||
async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Option<String>) -> PingResult {
|
||||
async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Option<String>, region: &str) -> PingResult {
|
||||
// Compute jitter: how late we actually started vs when we were scheduled
|
||||
let jitter_ms: Option<i64> = scheduled_at.as_deref().and_then(|s| {
|
||||
let scheduled = chrono::DateTime::parse_from_rfc3339(s).ok()?;
|
||||
|
|
@ -126,6 +134,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
|
|||
error: Some(e.clone()),
|
||||
cert_expiry_days: None,
|
||||
meta: None,
|
||||
region: if region.is_empty() { None } else { Some(region.to_string()) },
|
||||
}
|
||||
},
|
||||
Ok((status_code, headers, body)) => {
|
||||
|
|
@ -185,6 +194,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
|
|||
error: query_error,
|
||||
cert_expiry_days,
|
||||
meta: Some(meta),
|
||||
region: if region.is_empty() { None } else { Some(region.to_string()) },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ pub struct Monitor {
|
|||
pub interval_s: i64,
|
||||
pub query: Option<Value>,
|
||||
pub scheduled_at: Option<String>,
|
||||
pub regions: Option<Vec<String>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize)]
|
||||
|
|
@ -26,4 +27,5 @@ pub struct PingResult {
|
|||
pub error: Option<String>,
|
||||
pub cert_expiry_days: Option<i64>,
|
||||
pub meta: Option<Value>,
|
||||
pub region: Option<String>,
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -81,11 +81,20 @@
|
|||
</div>
|
||||
<div class="overflow-x-auto">
|
||||
<table class="w-full text-sm">
|
||||
<%
|
||||
const regionFlag = {
|
||||
'eu-central': '🇩🇪',
|
||||
'us-east': '🇺🇸',
|
||||
'us-west': '🇺🇸',
|
||||
'ap-southeast': '🇸🇬',
|
||||
};
|
||||
%>
|
||||
<thead>
|
||||
<tr class="text-gray-500 text-xs">
|
||||
<th class="text-left px-4 py-2 font-medium">Status</th>
|
||||
<th class="text-left px-4 py-2 font-medium">Code</th>
|
||||
<th class="text-left px-4 py-2 font-medium">Latency</th>
|
||||
<th class="text-left px-4 py-2 font-medium">Region</th>
|
||||
<th class="text-left px-4 py-2 font-medium">Time / Jitter</th>
|
||||
<th class="text-left px-4 py-2 font-medium">Error</th>
|
||||
</tr>
|
||||
|
|
@ -96,6 +105,7 @@
|
|||
<td class="px-4 py-2"><%~ c.up ? '<span class="text-green-400">Up</span>' : '<span class="text-red-400">Down</span>' %></td>
|
||||
<td class="px-4 py-2 text-gray-300"><%= c.status_code != null ? c.status_code : '—' %></td>
|
||||
<td class="px-4 py-2 text-gray-300"><%= c.latency_ms != null ? c.latency_ms + 'ms' : '—' %></td>
|
||||
<td class="px-4 py-2 text-gray-500 text-sm" title="<%= c.region || '' %>"><%= c.region ? (regionFlag[c.region] || '🌐') + ' ' + c.region : '—' %></td>
|
||||
<td class="px-4 py-2 text-gray-500"><%~ it.timeAgoSSR(c.checked_at) %><% if (c.jitter_ms != null) { %> <span class="text-gray-600 text-xs">(+<%= c.jitter_ms %>ms)</span><% } %></td>
|
||||
<td class="px-4 py-2 text-red-400/70 text-xs truncate max-w-[200px]"><%= c.error ? c.error : '' %></td>
|
||||
</tr>
|
||||
|
|
@ -313,10 +323,13 @@
|
|||
if (tbody) {
|
||||
const tr = document.createElement('tr');
|
||||
tr.className = 'hover:bg-gray-800/50';
|
||||
const regionFlags = {'eu-central':'🇩🇪','us-east':'🇺🇸','us-west':'🇺🇸','ap-southeast':'🇸🇬'};
|
||||
const regionDisplay = ping.region ? `${regionFlags[ping.region] || '🌐'} ${ping.region}` : '—';
|
||||
tr.innerHTML = `
|
||||
<td class="px-4 py-2">${ping.up ? '<span class="text-green-400">Up</span>' : '<span class="text-red-400">Down</span>'}</td>
|
||||
<td class="px-4 py-2 text-gray-300">${ping.status_code ?? '—'}</td>
|
||||
<td class="px-4 py-2 text-gray-300">${ping.latency_ms != null ? ping.latency_ms + 'ms' : '—'}</td>
|
||||
<td class="px-4 py-2 text-gray-500 text-sm" title="${ping.region || ''}">${regionDisplay}</td>
|
||||
<td class="px-4 py-2 text-gray-500">${timeAgo(ping.checked_at)}${ping.jitter_ms != null ? ` <span class="text-gray-600 text-xs">(+${ping.jitter_ms}ms)</span>` : ''}</td>
|
||||
<td class="px-4 py-2 text-red-400/70 text-xs truncate max-w-[200px]">${ping.error ? escapeHtml(ping.error) : ''}</td>
|
||||
`;
|
||||
|
|
|
|||
|
|
@ -78,6 +78,24 @@
|
|||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label class="block text-sm text-gray-400 mb-1.5">Regions <span class="text-gray-600">(optional — leave all unselected to use all regions)</span></label>
|
||||
<div class="flex flex-wrap gap-2" id="region-list">
|
||||
<label class="region-option flex items-center gap-2 bg-gray-900 border border-gray-800 hover:border-gray-600 rounded-lg px-3 py-2 cursor-pointer transition-colors">
|
||||
<input type="checkbox" value="eu-central" class="region-check accent-blue-500"> <span class="text-sm text-gray-300">🇩🇪 EU Central</span>
|
||||
</label>
|
||||
<label class="region-option flex items-center gap-2 bg-gray-900 border border-gray-800 hover:border-gray-600 rounded-lg px-3 py-2 cursor-pointer transition-colors">
|
||||
<input type="checkbox" value="us-east" class="region-check accent-blue-500"> <span class="text-sm text-gray-300">🇺🇸 US East</span>
|
||||
</label>
|
||||
<label class="region-option flex items-center gap-2 bg-gray-900 border border-gray-800 hover:border-gray-600 rounded-lg px-3 py-2 cursor-pointer transition-colors">
|
||||
<input type="checkbox" value="us-west" class="region-check accent-blue-500"> <span class="text-sm text-gray-300">🇺🇸 US West</span>
|
||||
</label>
|
||||
<label class="region-option flex items-center gap-2 bg-gray-900 border border-gray-800 hover:border-gray-600 rounded-lg px-3 py-2 cursor-pointer transition-colors">
|
||||
<input type="checkbox" value="ap-southeast" class="region-check accent-blue-500"> <span class="text-sm text-gray-300">🇸🇬 AP Southeast</span>
|
||||
</label>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div>
|
||||
<label class="block text-sm text-gray-400 mb-1.5">Query Conditions <span class="text-gray-600">(optional)</span></label>
|
||||
<p class="text-xs text-gray-600 mb-3">Define when this monitor should be considered "up". Defaults to status < 400.</p>
|
||||
|
|
@ -145,6 +163,8 @@
|
|||
timeout_ms: Number(document.getElementById('timeout').value),
|
||||
};
|
||||
if (Object.keys(headers).length) body.request_headers = headers;
|
||||
const regions = [...document.querySelectorAll('.region-check:checked')].map(el => el.value);
|
||||
if (regions.length) body.regions = regions;
|
||||
const rb = document.getElementById('request-body').value.trim();
|
||||
if (rb) body.request_body = rb;
|
||||
if (currentQuery) body.query = currentQuery;
|
||||
|
|
|
|||
|
|
@ -0,0 +1,84 @@
|
|||
#!/bin/bash
|
||||
# PingQL Deploy Script
|
||||
# Usage: ./deploy.sh [web|api|monitor|db|all] [...]
|
||||
# Example: ./deploy.sh web api
|
||||
# Example: ./deploy.sh all
|
||||
|
||||
set -e
|
||||
|
||||
SSH="ssh -o StrictHostKeyChecking=no -i ~/.ssh/id_ed25519"
|
||||
|
||||
DB_HOST="root@142.132.190.209"
|
||||
API_HOST="root@88.99.123.102"
|
||||
WEB_HOST="root@78.47.43.36"
|
||||
MONITOR_HOSTS=("root@5.161.76.127" "root@5.78.178.12" "root@5.223.51.251" "root@49.13.118.44")
|
||||
|
||||
deploy_db() {
|
||||
echo "[db] Restarting PostgreSQL on database-eu-central..."
|
||||
$SSH $DB_HOST "systemctl restart postgresql && echo 'PostgreSQL restarted'"
|
||||
}
|
||||
|
||||
deploy_api() {
|
||||
echo "[api] Deploying to api-eu-central..."
|
||||
$SSH $API_HOST bash << 'REMOTE'
|
||||
cd /opt/pingql
|
||||
git pull
|
||||
cd apps/api
|
||||
/root/.bun/bin/bun install
|
||||
systemctl restart pingql-api
|
||||
systemctl restart caddy
|
||||
echo "API deployed and restarted"
|
||||
REMOTE
|
||||
}
|
||||
|
||||
deploy_web() {
|
||||
echo "[web] Deploying to web-eu-central..."
|
||||
$SSH $WEB_HOST bash << 'REMOTE'
|
||||
cd /opt/pingql
|
||||
git pull
|
||||
cd apps/web
|
||||
/root/.bun/bin/bun install
|
||||
/root/.bun/bin/bun run css
|
||||
systemctl restart pingql-web
|
||||
systemctl restart caddy
|
||||
echo "Web deployed and restarted"
|
||||
REMOTE
|
||||
}
|
||||
|
||||
deploy_monitor() {
|
||||
echo "[monitor] Deploying to all 4 monitors in parallel..."
|
||||
for host in "${MONITOR_HOSTS[@]}"; do
|
||||
(
|
||||
echo "[monitor] Starting deploy on $host..."
|
||||
$SSH $host bash << 'REMOTE'
|
||||
cd /opt/pingql
|
||||
git pull
|
||||
cd apps/monitor
|
||||
/root/.cargo/bin/cargo build --release
|
||||
systemctl restart pingql-monitor
|
||||
echo "Monitor deployed and restarted on $(hostname)"
|
||||
REMOTE
|
||||
) &
|
||||
done
|
||||
wait
|
||||
echo "[monitor] All monitors deployed"
|
||||
}
|
||||
|
||||
# Parse args
|
||||
if [ $# -eq 0 ]; then
|
||||
echo "Usage: $0 [web|api|monitor|db|all] [...]"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
db) deploy_db ;;
|
||||
api) deploy_api ;;
|
||||
web) deploy_web ;;
|
||||
monitor) deploy_monitor ;;
|
||||
all) deploy_db; deploy_api; deploy_web; deploy_monitor ;;
|
||||
*) echo "Unknown target: $arg (valid: web, api, monitor, db, all)"; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
echo "Deploy complete."
|
||||
Loading…
Reference in New Issue