fix: spawn cert check as independent task, never blocks main request timeout

This commit is contained in:
M1 2026-03-18 12:35:58 +04:00
parent dbbc9c00cc
commit 68093131fa
1 changed files with 42 additions and 31 deletions

View File

@ -93,42 +93,53 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
let is_https = monitor.url.starts_with("https://");
let url_clone = monitor.url.clone();
// Run the HTTP request and cert check concurrently, both under the same timeout.
// This prevents a hanging TCP connect in the cert check from blocking the whole check.
// Wrap request + body read in a hard timeout.
// Cert check runs as a background task with a shorter cap so it never blocks
// the main check — if the cert TLS connect hangs (e.g. site totally down),
// we still report the result from the HTTP side within the configured timeout.
let cert_handle = if is_https {
Some(tokio::spawn(tokio::time::timeout(
std::time::Duration::from_secs(10),
async move { check_cert_expiry(&url_clone).await },
)))
} else {
None
};
let timed = tokio::time::timeout(timeout, async {
let cert_future = async {
if is_https {
check_cert_expiry(&url_clone).await.ok().flatten()
let resp = req.send().await?;
let status = resp.status();
let headers: HashMap<String, String> = resp.headers().iter()
.filter_map(|(k, v)| Some((k.to_string(), v.to_str().ok()?.to_string())))
.collect();
const MAX_BODY_BYTES: usize = 10 * 1024 * 1024;
let body = {
let content_len = resp.content_length().unwrap_or(0) as usize;
if content_len > MAX_BODY_BYTES {
format!("[body truncated: Content-Length {} exceeds 10MB limit]", content_len)
} else {
None
let bytes = resp.bytes().await?;
let truncated = &bytes[..bytes.len().min(MAX_BODY_BYTES)];
String::from_utf8_lossy(truncated).into_owned()
}
};
let req_future = async {
let resp = req.send().await?;
let status = resp.status();
let headers: HashMap<String, String> = resp.headers().iter()
.filter_map(|(k, v)| Some((k.to_string(), v.to_str().ok()?.to_string())))
.collect();
const MAX_BODY_BYTES: usize = 10 * 1024 * 1024;
let body = {
let content_len = resp.content_length().unwrap_or(0) as usize;
if content_len > MAX_BODY_BYTES {
format!("[body truncated: Content-Length {} exceeds 10MB limit]", content_len)
} else {
let bytes = resp.bytes().await?;
let truncated = &bytes[..bytes.len().min(MAX_BODY_BYTES)];
String::from_utf8_lossy(truncated).into_owned()
}
};
Ok::<_, reqwest::Error>((status, headers, body))
};
let (cert_result, req_result) = tokio::join!(cert_future, req_future);
req_result.map(|(status, headers, body)| (status, headers, body, cert_result))
Ok::<_, reqwest::Error>((status, headers, body))
}).await;
// Collect cert result — give it up to 2s after the main request finishes,
// then abort. This way a fast site still gets cert info, but a hung cert
// check never blocks the ping result.
let cert_expiry_days = match cert_handle {
Some(handle) => {
match tokio::time::timeout(std::time::Duration::from_secs(2), handle).await {
Ok(Ok(Ok(Ok(days)))) => days,
_ => None,
}
},
None => None,
};
let latency_ms = start.elapsed().as_millis() as u64;
// Flatten timeout + reqwest errors into a single result
@ -150,7 +161,7 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op
cert_expiry_days: None,
meta: None,
},
Ok((status_raw, headers, body, cert_expiry_days)) => {
Ok((status_raw, headers, body)) => {
let status = status_raw.as_u16();
// Evaluate query if present