From 68093131fa21f9928434fea301f1e1441c31574d Mon Sep 17 00:00:00 2001 From: M1 Date: Wed, 18 Mar 2026 12:35:58 +0400 Subject: [PATCH] fix: spawn cert check as independent task, never blocks main request timeout --- apps/monitor/src/runner.rs | 73 ++++++++++++++++++++++---------------- 1 file changed, 42 insertions(+), 31 deletions(-) diff --git a/apps/monitor/src/runner.rs b/apps/monitor/src/runner.rs index 8d35ec0..6f671e9 100644 --- a/apps/monitor/src/runner.rs +++ b/apps/monitor/src/runner.rs @@ -93,42 +93,53 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op let is_https = monitor.url.starts_with("https://"); let url_clone = monitor.url.clone(); - // Run the HTTP request and cert check concurrently, both under the same timeout. - // This prevents a hanging TCP connect in the cert check from blocking the whole check. + // Wrap request + body read in a hard timeout. + // Cert check runs as a background task with a shorter cap so it never blocks + // the main check — if the cert TLS connect hangs (e.g. site totally down), + // we still report the result from the HTTP side within the configured timeout. + let cert_handle = if is_https { + Some(tokio::spawn(tokio::time::timeout( + std::time::Duration::from_secs(10), + async move { check_cert_expiry(&url_clone).await }, + ))) + } else { + None + }; + let timed = tokio::time::timeout(timeout, async { - let cert_future = async { - if is_https { - check_cert_expiry(&url_clone).await.ok().flatten() + let resp = req.send().await?; + let status = resp.status(); + let headers: HashMap = resp.headers().iter() + .filter_map(|(k, v)| Some((k.to_string(), v.to_str().ok()?.to_string()))) + .collect(); + + const MAX_BODY_BYTES: usize = 10 * 1024 * 1024; + let body = { + let content_len = resp.content_length().unwrap_or(0) as usize; + if content_len > MAX_BODY_BYTES { + format!("[body truncated: Content-Length {} exceeds 10MB limit]", content_len) } else { - None + let bytes = resp.bytes().await?; + let truncated = &bytes[..bytes.len().min(MAX_BODY_BYTES)]; + String::from_utf8_lossy(truncated).into_owned() } }; - - let req_future = async { - let resp = req.send().await?; - let status = resp.status(); - let headers: HashMap = resp.headers().iter() - .filter_map(|(k, v)| Some((k.to_string(), v.to_str().ok()?.to_string()))) - .collect(); - - const MAX_BODY_BYTES: usize = 10 * 1024 * 1024; - let body = { - let content_len = resp.content_length().unwrap_or(0) as usize; - if content_len > MAX_BODY_BYTES { - format!("[body truncated: Content-Length {} exceeds 10MB limit]", content_len) - } else { - let bytes = resp.bytes().await?; - let truncated = &bytes[..bytes.len().min(MAX_BODY_BYTES)]; - String::from_utf8_lossy(truncated).into_owned() - } - }; - Ok::<_, reqwest::Error>((status, headers, body)) - }; - - let (cert_result, req_result) = tokio::join!(cert_future, req_future); - req_result.map(|(status, headers, body)| (status, headers, body, cert_result)) + Ok::<_, reqwest::Error>((status, headers, body)) }).await; + // Collect cert result — give it up to 2s after the main request finishes, + // then abort. This way a fast site still gets cert info, but a hung cert + // check never blocks the ping result. + let cert_expiry_days = match cert_handle { + Some(handle) => { + match tokio::time::timeout(std::time::Duration::from_secs(2), handle).await { + Ok(Ok(Ok(Ok(days)))) => days, + _ => None, + } + }, + None => None, + }; + let latency_ms = start.elapsed().as_millis() as u64; // Flatten timeout + reqwest errors into a single result @@ -150,7 +161,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op cert_expiry_days: None, meta: None, }, - Ok((status_raw, headers, body, cert_expiry_days)) => { + Ok((status_raw, headers, body)) => { let status = status_raw.as_u16(); // Evaluate query if present