From 5b0bce65c6969ac40050fb8dfd47d87d7a6ebc61 Mon Sep 17 00:00:00 2001 From: M1 Date: Wed, 18 Mar 2026 12:43:27 +0400 Subject: [PATCH] fix: pre-flight TCP connect check with hard tokio timeout before reqwest attempt --- apps/monitor/src/runner.rs | 60 +++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 10 deletions(-) diff --git a/apps/monitor/src/runner.rs b/apps/monitor/src/runner.rs index 4aacfa9..8f8addb 100644 --- a/apps/monitor/src/runner.rs +++ b/apps/monitor/src/runner.rs @@ -70,20 +70,60 @@ async fn run_check(client: &reqwest::Client, monitor: &Monitor, scheduled_at: Op let method = monitor.method.as_deref().unwrap_or("GET").to_uppercase(); let timeout = std::time::Duration::from_millis(monitor.timeout_ms.unwrap_or(30000)); - // Build a per-check client with connect_timeout = monitor timeout. - // This ensures the OS-level TCP connect is bounded, since tokio future - // cancellation alone cannot interrupt a kernel-level SYN wait. - let check_client = reqwest::Client::builder() - .user_agent("PingQL-Monitor/0.1") - .connect_timeout(timeout) - .timeout(timeout) - .build() - .unwrap_or_else(|_| client.clone()); + // Pre-flight TCP connect check with a hard OS-level timeout. + // This catches hosts where the SYN packet hangs indefinitely — + // reqwest/hyper with rustls cannot be cancelled via tokio future drop alone. + let url_parsed = reqwest::Url::parse(&monitor.url).ok(); + if let Some(ref u) = url_parsed { + let host = u.host_str().unwrap_or(""); + let port = u.port_or_known_default().unwrap_or(443); + let addr = format!("{host}:{port}"); + // Resolve DNS first + let addrs: Vec<_> = match tokio::net::lookup_host(&addr).await { + Ok(a) => a.collect(), + Err(e) => { + return PingResult { + monitor_id: monitor.id.clone(), + scheduled_at, + jitter_ms, + status_code: None, + latency_ms: Some(start.elapsed().as_millis() as u64), + up: false, + error: Some(format!("DNS error: {e}")), + cert_expiry_days: None, + meta: None, + }; + } + }; + // Try TCP connect with hard timeout + let tcp_result = tokio::time::timeout( + timeout, + tokio::net::TcpStream::connect(addrs.as_slice()), + ).await; + if let Err(_) | Ok(Err(_)) = tcp_result { + let err = match tcp_result { + Err(_) => format!("timed out after {}ms", timeout.as_millis()), + Ok(Err(e)) => e.to_string(), + _ => unreachable!(), + }; + return PingResult { + monitor_id: monitor.id.clone(), + scheduled_at, + jitter_ms, + status_code: None, + latency_ms: Some(start.elapsed().as_millis() as u64), + up: false, + error: Some(err), + cert_expiry_days: None, + meta: None, + }; + } + } let req_method = reqwest::Method::from_bytes(method.as_bytes()) .unwrap_or(reqwest::Method::GET); - let mut req = check_client.request(req_method, &monitor.url); + let mut req = client.request(req_method, &monitor.url).timeout(timeout); if let Some(headers) = &monitor.request_headers { for (k, v) in headers {