From 289ec8e0381448f3c626d9c42c79d093890e90b4 Mon Sep 17 00:00:00 2001 From: M1 Date: Wed, 18 Mar 2026 13:05:43 +0400 Subject: [PATCH] fix: hard task-level timeout as failsafe so in-flight lock always clears --- apps/monitor/src/runner.rs | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/apps/monitor/src/runner.rs b/apps/monitor/src/runner.rs index ee76a9f..96d86b2 100644 --- a/apps/monitor/src/runner.rs +++ b/apps/monitor/src/runner.rs @@ -44,7 +44,23 @@ pub async fn fetch_and_run( let token = token.to_string(); let in_flight = in_flight.clone(); tokio::spawn(async move { - let result = run_check(&client, &monitor, monitor.scheduled_at.clone()).await; + let timeout_ms = monitor.timeout_ms.unwrap_or(30000); + // Hard deadline: timeout + 5s buffer, so hung checks always resolve + let deadline = std::time::Duration::from_millis(timeout_ms + 5000); + let result = match tokio::time::timeout(deadline, run_check(&client, &monitor, monitor.scheduled_at.clone())).await { + Ok(r) => r, + Err(_) => PingResult { + monitor_id: monitor.id.clone(), + scheduled_at: monitor.scheduled_at.clone(), + jitter_ms: None, + status_code: None, + latency_ms: Some(timeout_ms as u64), + up: false, + error: Some(format!("timed out after {}ms", timeout_ms)), + cert_expiry_days: None, + meta: None, + }, + }; // Remove from in-flight before posting so a fast next cycle can pick it up in_flight.lock().await.remove(&monitor.id); if let Err(e) = post_result(&client, &coordinator_url, &token, result).await {